From da25f490a0b3d9c234d659b21024d61fe2d08c8e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 Oct 2017 11:43:34 +0300 Subject: [PATCH 001/406] microblaze: add missing include to mmu_context_mm.h mmu_context_mm.h is using struct task_struct, which is defined in linux/sched.h. Source files that include mm_context_mm.h (directly or indirectly) and doesn't include linux/sched.h will generate an error. An example of that is drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c This patch adds an include of linux/sched.h to mmu_context_mm.h to avoid such errors. Signed-off-by: Oded Gabbay Signed-off-by: Michal Simek --- arch/microblaze/include/asm/mmu_context_mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h index 99472d2ca3404..97559fe0b9536 100644 --- a/arch/microblaze/include/asm/mmu_context_mm.h +++ b/arch/microblaze/include/asm/mmu_context_mm.h @@ -13,6 +13,7 @@ #include #include +#include #include #include From 253696ccd613fbdaa5aba1de44c461a058e0a114 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 10 Nov 2017 02:05:06 +0100 Subject: [PATCH 002/406] drm/vc4: Account for interrupts in flight Synchronously disable the IRQ to make the following cancel_work_sync invocation effective. An interrupt in flight could enqueue further overflow mem work. As we free the binner BO immediately following vc4_irq_uninstall this caused a NULL pointer dereference in the work callback vc4_overflow_mem_work. Link: https://github.com/anholt/linux/issues/114 Signed-off-by: Stefan Schake Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1510275907-993-2-git-send-email-stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 7d7af3a93d941..61b2e5377993d 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,6 +208,9 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + /* Undo the effects of a previous vc4_irq_uninstall. */ + enable_irq(dev->irq); + /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); @@ -225,6 +228,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } From f82b735936ffd58b6711cf1f1054616517d8ffcd Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 23 Oct 2017 23:18:16 -0600 Subject: [PATCH 003/406] Btrfs: add write_flags for compression bio Compression code path has only flaged bios with REQ_OP_WRITE no matter where the bios come from, but it could be a sync write if fsync starts this writeback or a normal writeback write if wb kthread starts a periodic writeback. It breaks the rule that sync writes and writeback writes need to be differentiated from each other, because from the POV of block layer, all bios need to be recognized by these flags in order to do some management, e.g. throttlling. This passes writeback_control to compression write path so that it can send bios with proper flags to block layer. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 7 ++++--- fs/btrfs/compression.h | 3 ++- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 3 ++- fs/btrfs/inode.c | 15 +++++++++++---- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b35ce16b3df3c..4a78e57263373 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -295,7 +295,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, - unsigned long nr_pages) + unsigned long nr_pages, + unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio *bio = NULL; @@ -327,7 +328,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = fs_info->fs_devices->latest_bdev; bio = btrfs_bio_alloc(bdev, first_byte); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; refcount_set(&cb->pending_bios, 1); @@ -374,7 +375,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_put(bio); bio = btrfs_bio_alloc(bdev, first_byte); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; bio_add_page(bio, page, PAGE_SIZE, 0); diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index da20755ebf218..93c5b82ae97ec 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -91,7 +91,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, - unsigned long nr_pages); + unsigned long nr_pages, + unsigned int write_flags); blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dd941885b9c30..1dfd14678db86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3252,7 +3252,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, delalloc_start, delalloc_end, &page_started, - nr_written); + nr_written, wbc); /* File system has been set read-only */ if (ret) { SetPageError(page); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 861dacb371c7f..d8b27af7101e6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -115,7 +115,8 @@ struct extent_io_ops { */ int (*fill_delalloc)(void *private_data, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written); + unsigned long *nr_written, + struct writeback_control *wbc); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b93fe05a39c76..8525a44a607ea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -378,6 +378,7 @@ struct async_cow { struct page *locked_page; u64 start; u64 end; + unsigned int write_flags; struct list_head extents; struct btrfs_work work; }; @@ -857,7 +858,8 @@ static noinline void submit_compressed_extents(struct inode *inode, async_extent->ram_size, ins.objectid, ins.offset, async_extent->pages, - async_extent->nr_pages)) { + async_extent->nr_pages, + async_cow->write_flags)) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; @@ -1191,7 +1193,8 @@ static noinline void async_cow_free(struct btrfs_work *work) static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct async_cow *async_cow; @@ -1208,6 +1211,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; + async_cow->write_flags = write_flags; if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && !btrfs_test_opt(fs_info, FORCE_COMPRESS)) @@ -1577,11 +1581,13 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end) */ static int run_delalloc_range(void *private_data, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + struct writeback_control *wbc) { struct inode *inode = private_data; int ret; int force_cow = need_force_cow(inode, start, end); + unsigned int write_flags = wbc_to_write_flags(wbc); if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1596,7 +1602,8 @@ static int run_delalloc_range(void *private_data, struct page *locked_page, set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written); + page_started, nr_written, + write_flags); } if (ret) btrfs_cleanup_ordered_extents(inode, start, end - start + 1); From 5e9f2ad5b2904a7e81df6d9a3dbef29478952eac Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 23 Oct 2017 09:58:46 +0300 Subject: [PATCH 004/406] btrfs: Fix transaction abort during failure in btrfs_rm_dev_item btrfs_rm_dev_item calls several function under an active transaction, however it fails to abort it if an error happens. Fix this by adding explicit btrfs_abort_transaction/btrfs_end_transaction calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1ecb938ba4d7..84d37b4ab230f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1750,20 +1750,24 @@ static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, key.offset = device->devid; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; + if (ret) { + if (ret > 0) + ret = -ENOENT; + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; + if (ret) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } + out: btrfs_free_path(path); - btrfs_commit_transaction(trans); + if (!ret) + ret = btrfs_commit_transaction(trans); return ret; } From 3065ae5b85654e9141b134bc7b07eb0f2ca7cfcf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Oct 2017 18:36:08 +0100 Subject: [PATCH 005/406] btrfs: add missing device::flush_bio puts This fixes potential bio leaks, in several error paths. Unfortunatelly the device structure freeing is opencoded in many places and I missed them when introducing the flush_bio. Most of the time, devices get freed through call_rcu(..., free_device), so it at least it's not that easy to hit the leak, but it's still possible through the path that frees stale devices. Fixes: e0ae99941423 ("btrfs: preallocate device flush bio") Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 84d37b4ab230f..b5c67dd5e5223 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -189,6 +189,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device, dev_list); list_del(&device->dev_list); rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); } kfree(fs_devices); @@ -578,6 +579,7 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) fs_devs->num_devices--; list_del(&dev->dev_list); rcu_string_free(dev->name); + bio_put(dev->flush_bio); kfree(dev); } break; @@ -630,6 +632,7 @@ static noinline int device_list_add(const char *path, name = rcu_string_strdup(path, GFP_NOFS); if (!name) { + bio_put(device->flush_bio); kfree(device); return -ENOMEM; } @@ -742,6 +745,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) name = rcu_string_strdup(orig_dev->name->str, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); goto error; } @@ -807,6 +811,7 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step) list_del_init(&device->dev_list); fs_devices->num_devices--; rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); } @@ -2353,6 +2358,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); ret = -ENOMEM; goto error; @@ -2362,6 +2368,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); ret = PTR_ERR(trans); goto error; @@ -2505,6 +2512,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (trans) btrfs_end_transaction(trans); rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2571,6 +2579,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); ret = -ENOMEM; goto error; @@ -6288,6 +6297,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, ret = find_next_devid(fs_info, &tmp); if (ret) { + bio_put(dev->flush_bio); kfree(dev); return ERR_PTR(ret); } From 619c47f3d4cd7a60576fd15e133a2eee4fcc0c4e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 19 Jun 2017 14:14:22 +0200 Subject: [PATCH 006/406] btrfs: dev_alloc_list is not protected by RCU, use normal list_del The dev_alloc_list list could be protected by various mutexes, depending on the context. The list tracks devices that can take part of allocating new chunks, so the closest mutex is chunk_mutex. Adding a new device from inside the ADD_DEV ioctl will need device_list_mutex and registering a new device from the ioctl needs uuid_mutex. All mutexes naturally guarantee exclusivity against the same context. The device ownership can move between the contexts and the exclusivity is guaranteed by other means, eg. during the mount with the uuid_mutex. There's no RCU involved for dev_alloc_list. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5c67dd5e5223..d48b24e54366f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2002,7 +2002,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, fs_devices = srcdev->fs_devices; list_del_rcu(&srcdev->dev_list); - list_del_rcu(&srcdev->dev_alloc_list); + list_del(&srcdev->dev_alloc_list); fs_devices->num_devices--; if (srcdev->missing) fs_devices->missing_devices--; From 56a0e706fcf870270878d6d72b71092ae42d229c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 30 Oct 2017 11:14:38 -0600 Subject: [PATCH 007/406] Btrfs: bail out gracefully rather than BUG_ON If a file's DIR_ITEM key is invalid (due to memory errors) and gets written to disk, a future lookup_path can end up with kernel panic due to BUG_ON(). This gets rid of the BUG_ON(), meanwhile output the corrupted key and return ENOENT if it's invalid. Signed-off-by: Liu Bo Reported-by: Guillaume Bouchard Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8525a44a607ea..1131db7a0b287 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5445,6 +5445,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, goto out_err; btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); + if (location->type != BTRFS_INODE_ITEM_KEY && + location->type != BTRFS_ROOT_ITEM_KEY) { + btrfs_warn(root->fs_info, +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", + __func__, name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + goto out_err; + } out: btrfs_free_path(path); return ret; @@ -5761,8 +5769,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } - BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); - index = srcu_read_lock(&fs_info->subvol_srcu); ret = fixup_tree_root_location(fs_info, dir, dentry, &location, &sub_root); From 9271c0ca573e02a360b636ecd8cb408852f4e9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 8 Nov 2017 17:25:04 +0200 Subject: [PATCH 008/406] drm/edid: Don't send non-zero YQ in AVI infoframe for HDMI 1.x sinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some sinks look at the YQ bits even when receiving RGB, and they get somehow confused when they see a non-zero YQ value. So we can't just blindly follow CEA-861-F and set YQ to match the RGB range. Unfortunately there is no good way to tell whether the sink designer claims to have read CEA-861-F. The CEA extension block revision number has generally been stuck at 3 since forever, and even a very recently manufactured sink might be based on an old design so the manufacturing date doesn't seem like something we can use. In lieu of better information let's follow CEA-861-F only for HDMI 2.0 sinks, since HDMI 2.0 is based on CEA-861-F. For HDMI 1.x sinks we'll always set YQ=0. The alternative would of course be to always set YQ=0. And if we ever encounter a HDMI 2.0+ sink with this bug that's what we'll probably have to do. Cc: stable@vger.kernel.org Cc: Jani Nikula Cc: Eric Anholt Cc: Neil Kownacki Reported-by: Neil Kownacki Tested-by: Neil Kownacki Fixes: fcc8a22cc905 ("drm/edid: Set YQ bits in the AVI infoframe according to CEA-861-F") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101639 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171108152504.12596-1-ville.syrjala@linux.intel.com Acked-by: Eric Anholt --- drivers/gpu/drm/drm_edid.c | 12 ++++++++++-- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- drivers/gpu/drm/vc4/vc4_hdmi.c | 3 ++- include/drm/drm_edid.h | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 6bb6337be920c..fc7946eb6665c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4809,7 +4809,8 @@ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range, - bool rgb_quant_range_selectable) + bool rgb_quant_range_selectable, + bool is_hdmi2_sink) { /* * CEA-861: @@ -4833,8 +4834,15 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, * YQ-field to match the RGB Quantization Range being transmitted * (e.g., when Limited Range RGB, set YQ=0 or when Full Range RGB, * set YQ=1) and the Sink shall ignore the YQ-field." + * + * Unfortunate certain sinks (eg. VIZ Model 67/E261VA) get confused + * by non-zero YQ when receiving RGB. There doesn't seem to be any + * good way to tell which version of CEA-861 the sink supports, so + * we limit non-zero YQ to HDMI 2.0 sinks only as HDMI 2.0 is based + * on on CEA-861-F. */ - if (rgb_quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) + if (!is_hdmi2_sink || + rgb_quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; else diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e8abea7594ec4..3fed1d3ecdedf 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -481,7 +481,8 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder, crtc_state->limited_color_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL, - intel_hdmi->rgb_quant_range_selectable); + intel_hdmi->rgb_quant_range_selectable, + is_hdmi2_sink); /* TODO: handle pixel repetition for YCBCR420 outputs */ intel_write_infoframe(encoder, crtc_state, &frame); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 937da8dd65b8b..8f71157a2b063 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -433,7 +433,8 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) vc4_encoder->limited_rgb_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL, - vc4_encoder->rgb_range_selectable); + vc4_encoder->rgb_range_selectable, + false); vc4_hdmi_write_infoframe(encoder, &frame); } diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 1e1908a6b1d66..a992434ded999 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -360,7 +360,8 @@ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range, - bool rgb_quant_range_selectable); + bool rgb_quant_range_selectable, + bool is_hdmi2_sink); /** * drm_eld_mnl - Get ELD monitor name length in bytes. From 53d1cd6b125fb9d69303516a1179ebc3b72f797a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: [PATCH 009/406] cpupowerutils: bench - Fix cpu online check cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51bae..2bb3eef7d5c1f 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; From dbdc468f35ee827cab2753caa1c660bdb832243a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: [PATCH 010/406] cpupower : Fix cpupower working when cpu0 is offline cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf9..5b3205f162174 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); From f48bf66b662e7acd6a32dbc28c4fa38931f8f0a6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 3 Nov 2017 22:26:44 +0000 Subject: [PATCH 011/406] Btrfs: move definition of the function btrfs_find_new_delalloc_bytes Move the definition of the function btrfs_find_new_delalloc_bytes() closer to the function btrfs_dirty_pages(), because in a future commit it will be used exclusively by btrfs_dirty_pages(). This just moves the function's definition, with no functional changes at all. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 82 ++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f80254d82f409..47e6ebad78c36 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -477,6 +477,47 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, + const u64 start, + const u64 len, + struct extent_state **cached_state) +{ + u64 search_start = start; + const u64 end = start + len - 1; + + while (search_start < end) { + const u64 search_len = end - search_start + 1; + struct extent_map *em; + u64 em_len; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, search_start, + search_len, 0); + if (IS_ERR(em)) + return PTR_ERR(em); + + if (em->block_start != EXTENT_MAP_HOLE) + goto next; + + em_len = em->len; + if (em->start < search_start) + em_len -= search_start - em->start; + if (em_len > search_len) + em_len = search_len; + + ret = set_extent_bit(&inode->io_tree, search_start, + search_start + em_len - 1, + EXTENT_DELALLOC_NEW, + NULL, cached_state, GFP_NOFS); +next: + search_start = extent_map_end(em); + free_extent_map(em); + if (ret) + return ret; + } + return 0; +} + /* * after copy_from_user, pages need to be dirtied and we need to make * sure holes are created between the current EOF and the start of @@ -1404,47 +1445,6 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, } -static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, - const u64 start, - const u64 len, - struct extent_state **cached_state) -{ - u64 search_start = start; - const u64 end = start + len - 1; - - while (search_start < end) { - const u64 search_len = end - search_start + 1; - struct extent_map *em; - u64 em_len; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, search_start, - search_len, 0); - if (IS_ERR(em)) - return PTR_ERR(em); - - if (em->block_start != EXTENT_MAP_HOLE) - goto next; - - em_len = em->len; - if (em->start < search_start) - em_len -= search_start - em->start; - if (em_len > search_len) - em_len = search_len; - - ret = set_extent_bit(&inode->io_tree, search_start, - search_start + em_len - 1, - EXTENT_DELALLOC_NEW, - NULL, cached_state, GFP_NOFS); -next: - search_start = extent_map_end(em); - free_extent_map(em); - if (ret) - return ret; - } - return 0; -} - /* * This function locks the extent and properly waits for data=ordered extents * to finish before allowing the pages to be modified if need. From e3b8a4858566a6cc25422fbfdfdd760b13b79280 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 4 Nov 2017 00:16:59 +0000 Subject: [PATCH 012/406] Btrfs: fix reported number of inode blocks after buffered append writes The patch from commit a7e3b975a0f9 ("Btrfs: fix reported number of inode blocks") introduced a regression where if we do a buffered write starting at position equal to or greater than the file's size and then stat(2) the file before writeback is triggered, the number of used blocks does not change (unless there's a prealloc/unwritten extent). Example: $ xfs_io -f -c "pwrite -S 0xab 0 64K" foobar $ du -h foobar 0 foobar $ sync $ du -h foobar 64K foobar The first version of that patch didn't had this regression and the second version, which was the one committed, was made only to address some performance regression detected by the intel test robots using fs_mark. This fixes the regression by setting the new delaloc bit in the range, and doing it at btrfs_dirty_pages() while setting the regular dealloc bit as well, so that this way we set both bits at once avoiding navigation of the inode's io tree twice. Doing it at btrfs_dirty_pages() is also the most meaninful place, as we should set the new dellaloc bit when if we set the delalloc bit, which happens only if we copied bytes into the pages at __btrfs_buffered_write(). This was making some of LTP's du tests fail, which can be quickly run using a command line like the following: $ ./runltp -q -p -l /ltp.log -f commands -s du -d /mnt Fixes: a7e3b975a0f9 ("Btrfs: fix reported number of inode blocks") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent_io.h | 5 ++-- fs/btrfs/file.c | 43 +++++++++++++++++++------------- fs/btrfs/inode.c | 9 ++++--- fs/btrfs/relocation.c | 3 ++- fs/btrfs/tests/extent-io-tests.c | 6 ++--- fs/btrfs/tests/inode-tests.c | 12 ++++----- 7 files changed, 46 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7df5536ab61e..72dcbf19f6cef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3180,6 +3180,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, int nr); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + unsigned int extra_bits, struct extent_state **cached_state, int dedupe); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct btrfs_root *new_root, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index d8b27af7101e6..2ef824b58e3c7 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -365,10 +365,11 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state); static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state) + u64 end, unsigned int extra_bits, + struct extent_state **cached_state) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_UPTODATE, + EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits, NULL, cached_state, GFP_NOFS); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 47e6ebad78c36..2de7b4f4ca3c7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -538,14 +538,34 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, u64 end_of_last_block; u64 end_pos = pos + write_bytes; loff_t isize = i_size_read(inode); + unsigned int extra_bits = 0; start_pos = pos & ~((u64) fs_info->sectorsize - 1); num_bytes = round_up(write_bytes + pos - start_pos, fs_info->sectorsize); end_of_last_block = start_pos + num_bytes - 1; + + if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (start_pos >= isize && + !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { + /* + * There can't be any extents following eof in this case + * so just set the delalloc new bit for the range + * directly. + */ + extra_bits |= EXTENT_DELALLOC_NEW; + } else { + err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), + start_pos, + num_bytes, cached); + if (err) + return err; + } + } + err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, - cached, 0); + extra_bits, cached, 0); if (err) return err; @@ -1473,10 +1493,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, + round_up(pos + write_bytes - start_pos, fs_info->sectorsize) - 1; - if (start_pos < inode->vfs_inode.i_size || - (inode->flags & BTRFS_INODE_PREALLOC)) { + if (start_pos < inode->vfs_inode.i_size) { struct btrfs_ordered_extent *ordered; - unsigned int clear_bits; lock_extent_bits(&inode->io_tree, start_pos, last_pos, cached_state); @@ -1498,19 +1516,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } if (ordered) btrfs_put_ordered_extent(ordered); - ret = btrfs_find_new_delalloc_bytes(inode, start_pos, - last_pos - start_pos + 1, - cached_state); - clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG; - if (ret) - clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED; - clear_extent_bit(&inode->io_tree, start_pos, - last_pos, clear_bits, - (clear_bits & EXTENT_LOCKED) ? 1 : 0, - 0, cached_state, GFP_NOFS); - if (ret) - return ret; + clear_extent_bit(&inode->io_tree, start_pos, last_pos, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, + 0, 0, cached_state, GFP_NOFS); *lockstart = start_pos; *lockend = last_pos; ret = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1131db7a0b287..993061f83067a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2032,11 +2032,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, } int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + unsigned int extra_bits, struct extent_state **cached_state, int dedupe) { WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - cached_state); + extra_bits, cached_state); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -2097,7 +2098,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, + btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state, 0); ClearPageChecked(page); set_page_dirty(page); @@ -4797,7 +4798,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, + ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, @@ -9163,7 +9164,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf) EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, end, + ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4cf2eb67eba6c..f0c3f00e97cbe 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3268,7 +3268,8 @@ static int relocate_file_extent_cluster(struct inode *inode, nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end, NULL, 0); + btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL, + 0); set_page_dirty(page); unlock_extent(&BTRFS_I(inode)->io_tree, diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index d06b1c931d05b..2e7f64a3b22b7 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -114,7 +114,7 @@ static int test_find_delalloc(u32 sectorsize) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -145,7 +145,7 @@ static int test_find_delalloc(u32 sectorsize) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -200,7 +200,7 @@ static int test_find_delalloc(u32 sectorsize) * * We are re-using our test_start from above since it works out well. */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL); + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index f797642c013da..30affb60da514 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) btrfs_test_inode_set_ops(inode); /* [BTRFS_MAX_EXTENT_SIZE] */ - ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, + ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -984,7 +984,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1018,7 +1018,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1036,7 +1036,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1053,7 +1053,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + sectorsize, - BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1089,7 +1089,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + sectorsize, - BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; From 51c4b8bba674cfd2260d173602c4dac08e4c3a99 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:11:30 +0200 Subject: [PATCH 013/406] KVM: x86: pvclock: Handle first-time write to pvclock-page contains random junk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest passes KVM it's pvclock-page GPA via WRMSR to MSR_KVM_SYSTEM_TIME / MSR_KVM_SYSTEM_TIME_NEW, KVM don't initialize pvclock-page to some start-values. It just requests a clock-update which will happen before entering to guest. The clock-update logic will call kvm_setup_pvclock_page() to update the pvclock-page with info. However, kvm_setup_pvclock_page() *wrongly* assumes that the version-field is initialized to an even number. This is wrong because at first-time write, field could be any-value. Fix simply makes sure that if first-time version-field is odd, increment it once more to make it even and only then start standard logic. This follows same logic as done in other pvclock shared-pages (See kvm_write_wall_clock() and record_steal_time()). Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c85aa2e2d1d..1d492b3660d51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1830,6 +1830,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, From ac9b305caa0df6f5b75d294e4b86c1027648991e Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 6 Nov 2017 16:15:10 +0200 Subject: [PATCH 014/406] KVM: nVMX/nSVM: Don't intercept #UD when running L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running L2, #UD should be intercepted by L1 or just forwarded directly to L2. It should not reach L0 x86 emulator. Therefore, set intercept for #UD only based on L1 exception-bitmap. Also add WARN_ON_ONCE() on L0 #UD intercept handlers to make sure it is never reached while running L2. This improves commit ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode") by removing an unnecessary exit from L2 to L0 on #UD when L1 doesn't intercept it. In addition, SVM L0 #UD intercept handler doesn't handle correctly the case it is raised from L2. In this case, it should forward the #UD to guest instead of x86 emulator. As done in VMX #UD intercept handler. This commit fixes this issue as-well. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx.c | 9 ++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59e13a79c2e3e..1bf7c09d97e61 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; + u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; + /* No need to intercept #UD if L1 doesn't intercept it */ + h_intercept_exceptions = + h->intercept_exceptions & ~(1U << UD_VECTOR); + c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = + h_intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2196,6 +2202,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 714a0673ec3cb..d319e2666ad58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else + eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5915,10 +5917,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); From 61cb57c9ed631c95b54f8e9090c89d18b3695b3c Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:32 +0200 Subject: [PATCH 015/406] KVM: x86: Exit to user-mode on #UD intercept when emulator requires MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instruction emulation after trapping a #UD exception can result in an MMIO access, for example when emulating a MOVBE on a processor that doesn't support the instruction. In this case, the #UD vmexit handler must exit to user mode, but there wasn't any code to do so. Add it for both VMX and SVM. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1bf7c09d97e61..eb714f1cdf7ee 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2204,6 +2204,8 @@ static int ud_interception(struct vcpu_svm *svm) WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d319e2666ad58..65f1f06f6aaa7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5919,6 +5919,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) { WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; From 1f4dcb3b213235e642088709a1c54964d23365e9 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: [PATCH 016/406] KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d492b3660d51..e5a7c53a19c61 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5433,7 +5433,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); From 9b8ae63798cb97e785a667ff27e43fa6220cb734 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: [PATCH 017/406] KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c341..977de5fb968be 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 65f1f06f6aaa7..6e4a0f822766e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6603,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; From 3853be2603191829b442b64dac6ae8ba0c027bf9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: [PATCH 018/406] KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8079d141792af..b4a87debbb4b3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5000,6 +5000,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5018,6 +5020,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; From f1b026a3310a441f504640dd3d9765eb533386b8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:48 -0800 Subject: [PATCH 019/406] KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the SDM, if the "load IA32_BNDCFGS" VM-entry controls is 1, the following checks are performed on the field for the IA32_BNDCFGS MSR: - Bits reserved in the IA32_BNDCFGS MSR must be 0. - The linear address in bits 63:12 must be canonical. Reviewed-by: Konrad Rzeszutek Wilk Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e4a0f822766e..707aaa954b3dd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10876,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return 1; + return 0; } From 5af4157388adad82c339e3742fb6b67840721347 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:49 -0800 Subject: [PATCH 020/406] KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1) null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1. In L1: BUG: unable to handle kernel paging request at ffffffffc015bf8f IP: vmx_vcpu_run+0x202/0x510 [kvm_intel] PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161 Oops: 0003 [#1] PREEMPT SMP CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6 RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel] Call Trace: WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002 In L0: -----------[ cut here ]------------ WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25 RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] Call Trace: paging64_page_fault+0x500/0xde0 [kvm] ? paging32_gva_to_gpa_nested+0x120/0x120 [kvm] ? nonpaging_page_fault+0x3b0/0x3b0 [kvm] ? __asan_storeN+0x12/0x20 ? paging64_gva_to_gpa+0xb0/0x120 [kvm] ? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm] ? lock_acquire+0x2c0/0x2c0 ? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel] ? vmx_get_segment+0x2a6/0x310 [kvm_intel] ? sched_clock+0x1f/0x30 ? check_chain_key+0x137/0x1e0 ? __lock_acquire+0x83c/0x2420 ? kvm_multiple_exception+0xf2/0x220 [kvm] ? debug_check_no_locks_freed+0x240/0x240 ? debug_smp_processor_id+0x17/0x20 ? __lock_is_held+0x9e/0x100 kvm_mmu_page_fault+0x90/0x180 [kvm] kvm_handle_page_fault+0x15c/0x310 [kvm] ? __lock_is_held+0x9e/0x100 handle_exception+0x3c7/0x4d0 [kvm_intel] vmx_handle_exit+0x103/0x1010 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm] The commit avoids to load host state of vmcs12 as vmcs01's guest state since vmcs12 is not modified (except for the VM-instruction error field) if the checking of vmcs control area fails. However, the mmu context is switched to nested mmu in prepare_vmcs02() and it will not be reloaded since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME fails. This patch fixes it by reloading mmu context when nested VMLAUNCH/VMRESUME fails. Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 707aaa954b3dd..10474d26a0002 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11330,6 +11330,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11343,7 +11361,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11370,17 +11387,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11610,6 +11617,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never From 4d772cb85f64c16eca00177089ecb3cd5d292120 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Nov 2017 18:04:05 +0100 Subject: [PATCH 021/406] KVM: x86: fix em_fxstor() sleeping while in atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") optimize the stack size, but introduced a guest memory access which might sleep while in atomic. Fix it by introducing, again, a second fxregs_state. Try to avoid large stacks by using noinline. Add some helpful comments. Reported by syzbot: in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109 2 locks held by syzkaller879109/2909: #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x1c/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:154 #1: (&kvm->srcu){....}, at: [] vcpu_enter_guest arch/x86/kvm/x86.c:6983 [inline] #1: (&kvm->srcu){....}, at: [] vcpu_run arch/x86/kvm/x86.c:7061 [inline] #1: (&kvm->srcu){....}, at: [] kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222 CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014 __might_sleep+0x95/0x190 kernel/sched/core.c:5967 __might_fault+0xab/0x1d0 mm/memory.c:4383 __copy_from_user include/linux/uaccess.h:71 [inline] __kvm_read_guest_page+0x58/0xa0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771 kvm_vcpu_read_guest_page+0x44/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791 kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407 kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466 segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819 em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022 x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471 x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698 kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854 handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400 vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718 vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline] vcpu_run arch/x86/kvm/x86.c:7061 [inline] kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222 kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x437fc9 RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000 R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000 Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4a87debbb4b3..e7d04d0c8008d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) fxstate_size(ctxt)); } +/* + * FXRSTOR might restore XMM registers not provided by the guest. Fill + * in the host registers (via FXSAVE) instead, so they won't be modified. + * (preemption has to stay disabled until FXRSTOR). + * + * Use noinline to keep the stack for other functions called by callers small. + */ +static noinline int fxregs_fixup(struct fxregs_state *fx_state, + const size_t used_size) +{ + struct fxregs_state fx_tmp; + int rc; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, + __fxstate_size(16) - used_size); + + return rc; +} + static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; @@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + size = fxstate_size(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->get_fpu(ctxt); - size = fxstate_size(ctxt); if (size < __fxstate_size(16)) { - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) goto out; } - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); - if (rc != X86EMUL_CONTINUE) - goto out; - if (fx_state.mxcsr >> 16) { rc = emulate_gp(ctxt, 0); goto out; From fab0aa3b776f0a3af1db1f50e04f1884015f9082 Mon Sep 17 00:00:00 2001 From: Eyal Moscovici Date: Wed, 8 Nov 2017 14:32:08 +0200 Subject: [PATCH 022/406] KVM: x86: Allow suppressing prints on RDMSR/WRMSR of unhandled MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some guests use these unhandled MSRs very frequently. This cause dmesg to be populated with lots of aggregated messages on usage of ignored MSRs. As ignore_msrs=true means that the user is well-aware his guest use ignored MSRs, allow to also disable the prints on their usage. An example of such guest is ESXi which tends to access a lot to MSR 0x34 (MSR_SMI_COUNT) very frequently. In addition, we have observed this to cause unnecessary delays to guest execution. Such an example is ESXi which experience networking delays in it's guests (L2 guests) because of these prints (even when prints are rate-limited). This can easily be reproduced by pinging from one L2 guest to another. Once in a while, a peak in ping RTT will be observed. Removing these unhandled MSR prints solves the issue. Because these prints can help diagnose issues with guests, this commit only suppress them by a module parameter instead of removing them from code entirely. Signed-off-by: Eyal Moscovici Reviewed-by: Liran Alon Reviewed-by: Krish Sadhukhan Signed-off-by: Krish Sadhukhan Signed-off-by: Konrad Rzeszutek Wilk [Changed suppress_ignore_msrs_prints to report_ignored_msrs - Radim] Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5a7c53a19c61..0c5b141927b87 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool __read_mostly report_ignored_msrs = true; +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); + unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); @@ -2325,7 +2328,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) @@ -2362,8 +2367,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, + "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; } } @@ -2581,7 +2588,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->index); return 1; } else { - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", + msr_info->index); msr_info->data = 0; } break; From 6ea6e84309ca7e0e850b3083e6b09344ee15c290 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 Nov 2017 10:49:38 +0100 Subject: [PATCH 023/406] KVM: x86: inject exceptions produced by x86_decode_insn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes, a processor might execute an instruction while another processor is updating the page tables for that instruction's code page, but before the TLB shootdown completes. The interesting case happens if the page is in the TLB. In general, the processor will succeed in executing the instruction and nothing bad happens. However, what if the instruction is an MMIO access? If *that* happens, KVM invokes the emulator, and the emulator gets the updated page tables. If the update side had marked the code page as non present, the page table walk then will fail and so will x86_decode_insn. Unfortunately, even though kvm_fetch_guest_virt is correctly returning X86EMUL_PROPAGATE_FAULT, x86_decode_insn's caller treats the failure as a fatal error if the instruction cannot simply be reexecuted (as is the case for MMIO). And this in fact happened sometimes when rebooting Windows 2012r2 guests. Just checking ctxt->have_exception and injecting the exception if true is enough to fix the case. Thanks to Eduardo Habkost for helping in the debugging of this issue. Reported-by: Yanan Fu Cc: Eduardo Habkost Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c5b141927b87..4552427105f69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5734,6 +5734,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); From 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: [PATCH 024/406] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bdff437acbcb7..ae0a7dc318b26 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } From da3fe7bdfada217bf02ecd0477fcdb55da50944c Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:30 +0200 Subject: [PATCH 025/406] KVM: x86: ioapic: Don't fire level irq when Remote IRR set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid firing a level-triggered interrupt that has the Remote IRR bit set, because that means that some CPU is already processing it. The Remote IRR bit will be cleared after an EOI and the interrupt will refire if the irq line is still asserted. This behavior is aligned with QEMU's IOAPIC implementation that was introduced by commit f99b86b94987 ("x86: ioapic: ignore level irq during processing") in QEMU. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index ae0a7dc318b26..5c92311392437 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -323,7 +323,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) struct kvm_lapic_irq irqe; int ret; - if (entry->fields.mask) + if (entry->fields.mask || + (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && + entry->fields.remote_irr)) return -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " From 7d2253684dd10eb800ee1898ad7904044ae88ed6 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:31 +0200 Subject: [PATCH 026/406] KVM: x86: ioapic: Remove redundant check for Remote IRR in ioapic_set_irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remote IRR for level-triggered interrupts was previously checked in ioapic_set_irq, but since we now have a check in ioapic_service we can remove the redundant check from ioapic_set_irq. This commit doesn't change semantics. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5c92311392437..6df150eaaa78f 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; - if (edge) + if (edge) { ioapic->irr_delivered &= ~mask; - if ((edge && old_irr == ioapic->irr) || - (!edge && entry.fields.remote_irr)) { - ret = 0; - goto out; + if (old_irr == ioapic->irr) { + ret = 0; + goto out; + } } ret = ioapic_service(ioapic, irq, line_status); From a8bfec2930525808c01f038825d1df3904638631 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: [PATCH 027/406] KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6df150eaaa78f..163d340ee5f82 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -304,8 +304,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); From b200dded0a6974a3b69599832b2203483920ab25 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: [PATCH 028/406] KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 163d340ee5f82..4e822ad363f37 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -276,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -298,6 +299,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -305,6 +309,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will From 917dc6068bc12a2dafffcf0e9d405ddb1b8780cb Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:07:43 +0200 Subject: [PATCH 029/406] KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_check_nested_events() should return -EBUSY only in case there is a pending L1 event which requires a VMExit from L2 to L1 but such a VMExit is currently blocked. Such VMExits are blocked either because nested_run_pending=1 or an event was reinjected to L2. vmx_check_nested_events() should return 0 in case there are no pending L1 events which requires a VMExit from L2 to L1 or if a VMExit from L2 to L1 was done internally. However, upstream commit which introduced blocking in case an event was reinjected to L2 (commit acc9ab601327 ("KVM: nVMX: Fix pending events injection")) contains a bug: It returns -EBUSY even if there are no pending L1 events which requires VMExit from L2 to L1. This commit fix this issue. Fixes: acc9ab601327 ("KVM: nVMX: Fix pending events injection") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10474d26a0002..be4724b5d434c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11105,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11120,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11143,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; From 50a671d4d15b859f447fa527191073019b6ce9cb Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 6 Nov 2017 11:44:23 -0600 Subject: [PATCH 030/406] KVM: x86: Fix CPUID function for word 6 (80000001_ECX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function for CPUID 80000001 ECX is set to 0xc0000001. Set it to 0x80000001. Signed-off-by: Janakarajan Natarajan Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: Borislav Petkov Fixes: d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers") Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3a65838..c2cea6651279f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, From c4ad77e0d49b10b412a9fa7f47a3a23177870bc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Nov 2017 14:23:59 +0100 Subject: [PATCH 031/406] KVM: vmx: use X86_CR4_UMIP and X86_FEATURE_UMIP These bits were not defined until now in common code, but they are now that the kernel supports UMIP too. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index be4724b5d434c..0d59dbe430c82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9801,8 +9801,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ - cr4_fixed1_update(bit(11), ecx, bit(2)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); #undef cr4_fixed1_update } From 97f07697932e6faf2a708f7aef7cba8e6b0cab96 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:37:54 +0800 Subject: [PATCH 032/406] bdi: convert bdi_debug_register to int Convert bdi_debug_register to int and then do error handle for it. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 74b52dfd5852d..b5f940ce0143b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = { .release = single_release, }; -static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + if (!bdi_debug_root) + return -ENOMEM; + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + if (!bdi->debug_dir) + return -ENOMEM; + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, &bdi_debug_stats_fops); + if (!bdi->debug_stats) { + debugfs_remove(bdi->debug_dir); + return -ENOMEM; + } + + return 0; } static void bdi_debug_unregister(struct backing_dev_info *bdi) @@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi) static inline void bdi_debug_init(void) { } -static inline void bdi_debug_register(struct backing_dev_info *bdi, +static inline int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + return 0; } static inline void bdi_debug_unregister(struct backing_dev_info *bdi) { From a0747a859ef6d3cc5b6cd50eb694499b78dd0025 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Fri, 17 Nov 2017 23:06:04 +0800 Subject: [PATCH 033/406] bdi: add error handle for bdi_debug_register In order to make error handle more cleaner we call bdi_debug_register before set state to WB_registered, that we can avoid call bdi_unregister in release_bdi(). Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b5f940ce0143b..84b2dc76f140e 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -882,10 +882,13 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (IS_ERR(dev)) return PTR_ERR(dev); + if (bdi_debug_register(bdi, dev_name(dev))) { + device_destroy(bdi_class, dev->devt); + return -ENOMEM; + } cgwb_bdi_register(bdi); bdi->dev = dev; - bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); From 3a92168bc8a113098b44a95d499557a88bb387da Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:38:59 +0800 Subject: [PATCH 034/406] block: add WARN_ON if bdi register fail device_add_disk need do more safety error handle, so this patch just add WARN_ON. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Adapted for current series by me. Signed-off-by: Jens Axboe --- block/genhd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index c2223f12a8051..c143a22742381 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; disk->flags |= GENHD_FL_NO_PART_SCAN; } else { + int ret; + /* Register BDI before referencing it from bdev */ disk_to_dev(disk)->devt = devt; - bdi_register_owner(disk->queue->backing_dev_info, - disk_to_dev(disk)); + ret = bdi_register_owner(disk->queue->backing_dev_info, + disk_to_dev(disk)); + WARN_ON(ret); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } From 7fb526212f16fcec4e92121ea86dc28fba493177 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Nov 2017 17:43:38 -0800 Subject: [PATCH 035/406] block: genhd.c: fix message typo Fix typo in error message. Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index c143a22742381..96a66f6717204 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1392,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (minors > DISK_MAX_PARTS) { printk(KERN_ERR - "block: can't allocated more than %d partitions\n", + "block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); minors = DISK_MAX_PARTS; } From 1690102de5651bb85b23d5eeaff682a6b96d705b Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 19 Nov 2017 16:48:13 -0200 Subject: [PATCH 036/406] blktrace: Use blk_trace_bio_get_cgid inside blk_add_trace_bio We always pass in blk_trace_bio_get_cgid(q, bio) to blk_add_trace_bio(). Since both are readily available in the function already, kill the argument. Signed-off-by: Marcos Paulo de Souza Rewrote commit message. Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 206e0e2ace53d..c5987d4c5f23b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what, int error, union kernfs_node_id *cgid) + u32 what, int error) { struct blk_trace *bt = q->blk_trace; @@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, return; __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); + bio_op(bio), bio->bi_opf, what, error, 0, NULL, + blk_trace_bio_get_cgid(q, bio)); } static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, @@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, @@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { struct blk_trace *bt = q->blk_trace; @@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { struct blk_trace *bt = q->blk_trace; From 48832f8d58cfedb2f9bee11bbfbb657efb42e7e7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:20 +0300 Subject: [PATCH 037/406] nvme-fabrics: introduce init command check for a queue that is not alive When the fabrics queue is not alive and fully functional, no commands should be allowed to pass but connect (which moves the queue to a fully functional state). Any other command should be failed, with either temporary status BLK_STS_RESOUCE or permanent status BLK_STS_IOERR. This is shared across all fabrics, hence move the check to fabrics library. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/rdma.c | 32 ++++++-------------------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 42232e731f19f..9ba614953607e 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); +static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, + struct request *rq) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + /* + * We cannot accept any other command until the connect command has + * completed, so only allow connect to pass. + */ + if (!blk_rq_is_passthrough(rq) || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) { + /* + * Reconnecting state means transport disruption, which can take + * a long time and even might fail permanently, fail fast to + * give upper layers a chance to failover. + * Deleting state means that the ctrl will never accept commands + * again, fail it permanently. + */ + if (ctrl->state == NVME_CTRL_RECONNECTING || + ctrl->state == NVME_CTRL_DELETING) { + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; + } + return BLK_STS_RESOURCE; /* try again later */ + } + + return BLK_STS_OK; +} + #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4f9bf2f815c39..2c597105a6bf3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1591,31 +1591,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved) * We cannot accept any other command until the Connect command has completed. */ static inline blk_status_t -nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) -{ - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = nvme_req(rq)->cmd; - - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * reconnecting state means transport disruption, which - * can take a long time and even might fail permanently, - * fail fast to give upper layers a chance to failover. - * deleting state means that the ctrl will never accept - * commands again, fail it permanently. - */ - if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING || - queue->ctrl->ctrl.state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - } - - return 0; +nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) +{ + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1634,7 +1614,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_queue_is_ready(queue, rq); + ret = nvme_rdma_is_ready(queue, rq); if (unlikely(ret)) return ret; From 9e0ed16ab9a9aaf670b81c9cd05b5e50defed654 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:21 +0300 Subject: [PATCH 038/406] nvme-fc: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_FC_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7ab0be55c7d06..e0577bf33f45e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -32,6 +32,7 @@ enum nvme_fc_queue_flags { NVME_FC_Q_CONNECTED = (1 << 0), + NVME_FC_Q_LIVE = (1 << 1), }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ @@ -1927,6 +1928,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) return; + clear_bit(NVME_FC_Q_LIVE, &queue->flags); /* * Current implementation never disconnects a single queue. * It always terminates a whole association. So there is never @@ -1934,7 +1936,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); } static void @@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -2320,6 +2323,14 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, return BLK_STS_RESOURCE; } +static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; + ret = nvme_fc_is_ready(queue, rq); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, rq, sqe); if (ret) return ret; @@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_disconnect_admin_queue; + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * Check controller capabilities * From 9d7fab04b95e8c26014a9bfc1c943b8360b44c17 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:22 +0300 Subject: [PATCH 039/406] nvme-loop: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_LOOP_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 96d390416789b..1e21b286f2998 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -52,10 +52,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_loop_ctrl, ctrl); } +enum nvme_loop_queue_flags { + NVME_LOOP_Q_LIVE = 0, +}; + struct nvme_loop_queue { struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvme_loop_ctrl *ctrl; + unsigned long flags; }; static struct nvmet_port *nvmet_loop_port; @@ -144,6 +149,14 @@ nvme_loop_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -153,6 +166,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; + ret = nvme_loop_is_ready(queue, req); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, req, &iod->cmd); if (ret) return ret; @@ -267,6 +284,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -297,8 +315,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->ctrl.queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + } } static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) @@ -338,6 +358,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); } return 0; @@ -380,6 +401,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, From 8427bbc224863e14d905c87920d4005cb3e88ac3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 9 Nov 2017 01:12:03 -0500 Subject: [PATCH 040/406] nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A The NVMe device in question drops off the PCIe bus after system suspend. I've tried several approaches to workaround this issue, but none of them works: - NVME_QUIRK_DELAY_BEFORE_CHK_RDY - NVME_QUIRK_NO_DEEPEST_PS - Disable APST before controller shutdown - Delay between controller shutdown and system suspend - Explicitly set power state to 0 before controller shutdown Fortunately it's a desktop, so disable APST won't hurt the battery. Also, change the quirk function name to reflect it's for vendor combination quirks. BugLink: https://bugs.launchpad.net/bugs/1705748 Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a11cfd4700892..8a15aa50b8e06 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2428,7 +2428,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } -static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) { if (pdev->vendor == 0x144d && pdev->device == 0xa802) { /* @@ -2443,6 +2443,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) return NVME_QUIRK_NO_DEEPEST_PS; + } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { + /* + * Samsung SSD 960 EVO drops off the PCIe bus after system + * suspend on a Ryzen board, ASUS PRIME B350M-A. + */ + if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && + dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + return NVME_QUIRK_NO_APST; } return 0; @@ -2482,7 +2490,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto unmap; - quirks |= check_dell_samsung_bug(pdev); + quirks |= check_vendor_combination_bug(pdev); result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); From 244a8fe40a09c218622eb9927b9090b0a9b73a1a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 17 Nov 2017 01:34:24 +0900 Subject: [PATCH 041/406] nvme-pci: avoid hmb desc array idx out-of-bound when hmmaxd set. hmb descriptor idx out-of-bound occurs in case of below conditions. preferred = 128MiB chunk_size = 4MiB hmmaxd = 1 Current code will not allow rmmod which will free hmb descriptors to be done successfully in above case. "descs[i]" will be set in for-loop without seeing any conditions related to "max_entries" after a single "descs" was allocated by (max_entries = 1) in this case. Added a condition into for-loop to check index of descriptors. Fixes: 044a9df1("nvme-pci: implement the HMB entry number and size limitations") Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8a15aa50b8e06..58dbe684007bd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1787,7 +1787,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (!bufs) goto out_free_descs; - for (size = 0; size < preferred; size += len) { + for (size = 0; size < preferred && i < max_entries; size += len) { dma_addr_t dma_addr; len = min_t(u64, chunk_size, preferred - size); From 89c4aff6d4f71726f22e567f046dc1dd73c35de1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 14:26:27 +0000 Subject: [PATCH 042/406] nvme: fix spelling mistake: "requeing" -> "requeuing" Trivial fix to spelling mistake in dev_warn_ratelimited message text Signed-off-by: Colin Ian King Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 78d92151a9042..1218a9fca8466 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, bio->bi_opf |= REQ_NVME_MPATH; ret = direct_make_request(bio); } else if (!list_empty_careful(&head->list)) { - dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); + dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); From b0d61d586f09fd814a45a5d778fe0d6123f67c2a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:49 -0700 Subject: [PATCH 043/406] nvme: Fix NULL dereference on reservation request This fixes using the NULL 'head' before getting the reference. It is however possible the head will always be NULL, so this patch uses the struct nvme_ns to get the ns_id field. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 25da74d310d1b..a2ab4e440bead 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, int srcu_idx, ret; u8 data[16] = { 0, }; + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + put_unaligned_le64(key, &data[0]); put_unaligned_le64(sa_key, &data[8]); memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(head->ns_id); + c.common.nsid = cpu_to_le32(ns->head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - ret = -EWOULDBLOCK; - else - ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); + ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); nvme_put_ns_from_disk(head, srcu_idx); return ret; } From 9941a862cc92e448df95709ff40a618964b25e33 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:50 -0700 Subject: [PATCH 044/406] nvme: Suppress static analyis warning The ns->head is always valid, so we don't need to check for NULL. Reported-by: Dan Carpenter Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a2ab4e440bead..f837d666cbd49 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_ns_remove(struct nvme_ns *ns) { - struct nvme_ns_head *head = ns->head; - if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); nvme_mpath_clear_current_path(ns); - if (head) - list_del_rcu(&ns->siblings); + list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); mutex_unlock(&ns->ctrl->namespaces_mutex); - synchronize_srcu(&head->srcu); + synchronize_srcu(&ns->head->srcu); nvme_put_ns(ns); } From 619c62dcc62b957d17cccde2081cad527b020883 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 10 Nov 2017 15:38:45 -0800 Subject: [PATCH 045/406] nvmet-fc: correct ref counting error when deferred rcv used Whenever a cmd is received a reference is taken while looking up the queue. The reference is removed after the cmd is done as the iod is returned for reuse. The fod may be reused for a deferred (recevied but no job context) cmd. Existing code removes the reference only if the fod is not reused for another command. Given the fod may be used for one or more ios, although a reference was taken per io, it won't be matched on the frees. Remove the reference on every fod free. This pairs the references to each io. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 664d3013f68f3..5fd86039e3536 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -533,15 +533,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + /* release the queue lookup reference on the completed IO */ + nvmet_fc_tgt_q_put(queue); + spin_lock_irqsave(&queue->qlock, flags); deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, struct nvmet_fc_defer_fcp_req, req_list); if (!deferfcp) { list_add_tail(&fod->fcp_list, &fod->queue->fod_list); spin_unlock_irqrestore(&queue->qlock, flags); - - /* Release reference taken at queue lookup and fod allocation */ - nvmet_fc_tgt_q_put(queue); return; } @@ -760,6 +760,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) tgtport->ops->fcp_req_release(&tgtport->fc_target_port, deferfcp->fcp_req); + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + kfree(deferfcp); spin_lock_irqsave(&queue->qlock, flags); From 38389ec84e835fa31a59b7dabb18343106a6d0d5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Nov 2017 14:26:36 +0100 Subject: [PATCH 046/406] s390/topology: fix compile error in file arch/s390/kernel/smp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1887aa07b676 ("s390/topology: add detection of dedicated vs shared CPUs") introduced following compiler error when CONFIG_SCHED_TOPOLOGY is not set. CC arch/s390/kernel/smp.o ... arch/s390/kernel/smp.c: In function ‘smp_start_secondary’: arch/s390/kernel/smp.c:812:6: error: implicit declaration of function ‘topology_cpu_dedicated’; did you mean ‘topology_cpu_init’? This patch fixes the compiler error by adding function topology_cpu_dedicated() to return false when this config option is not defined. Signed-off-by: Thomas Richter Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 1 + arch/s390/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 1807229b292f0..cca406fdbe51f 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cd4334e80b64c..b8c1a85bcf2de 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "entry.h" enum { From 3241d3eb7d9bb7a134bedf49db196cf98b58834d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Nov 2017 14:54:04 +0100 Subject: [PATCH 047/406] s390: rework __switch_to() to allow larger task_struct offsets If GCC_PLUGIN_RANDSTRUCT is enabled the members of task_struct will be shuffled around. The offsets of the "pid" and "stack" members within task_struct may not necessarily fit into 12 bits anymore, which causes compile errors within __switch_to, since instructions are used, which only have a 12 bit displacement field. Therefore rework __switch_to, to allow for larger offsets. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a316cd6999ad9..9e5f6cd8e4c2e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) */ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - lgr %r1,%r2 - aghi %r1,__TASK_thread # thread_struct of prev task - lg %r5,__TASK_stack(%r3) # start of kernel stack of next - stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev - lgr %r1,%r3 - aghi %r1,__TASK_thread # thread_struct of next task + lghi %r4,__TASK_stack + lghi %r1,__TASK_thread + lg %r5,0(%r4,%r3) # start of kernel stack of next + stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lgr %r15,%r5 aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack - lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next + aghi %r3,__TASK_pid + mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP bzr %r14 From de35089cc82c33f8be1dec5a0c3d7a05cf91720d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Nov 2017 09:50:40 +0100 Subject: [PATCH 048/406] s390/disassembler: remove confusing code When searching the opcode offset table within find_insn() the check "entry->opcode == 0" was intended to clarify that 1-byte opcodes, the first one being 0, are special. However there is no mnemonic for an illegal opcode starting with 0. Therefore there is also no opcode offset table entry that matches, which again means that the check never is true. Therefore just remove the confusing check, and add a comment which hopefully explains how this works. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3be829721cf94..465d52b5e4708 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -396,9 +396,14 @@ struct s390_insn *find_insn(unsigned char *code) unsigned char opfrag; int i; + /* Search the opcode offset table to find an entry which + * matches the beginning of the opcode. If there is no match + * the last entry will be used, which is the default entry for + * unknown instructions as well as 1-byte opcode instructions. + */ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { entry = &opcode_offset[i]; - if (entry->opcode == code[0] || entry->opcode == 0) + if (entry->opcode == code[0]) break; } From 0b0882672640ced4deeebf84da0b88b6389619c4 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Nov 2017 16:32:22 +0100 Subject: [PATCH 049/406] s390/zcrypt: Fix wrong comparison leading to strange load balancing The function to decide if one zcrypt queue is better than another one compared two pointers instead of comparing the values where the pointers refer to. So within the same zcrypt card when load of each queue was equal just one queue was used. This effect only appears on relatively lite load, typically with one thread applications. This patch fixes the wrong comparison and now the counters show that requests are balanced equally over all available queues within the cards. There is no performance improvement coming with this fix. As long as the queue depth for an APQN queue is not touched, processing is not faster when requests are spread over queues within the same card hardware. So this fix only beautifies the lszcrypt counter printouts. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index b5f4006198b9e..a9a56aa9c26b7 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -218,8 +218,8 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, weight += atomic_read(&zq->load); pref_weight += atomic_read(&pref_zq->load); if (weight == pref_weight) - return &zq->queue->total_request_count > - &pref_zq->queue->total_request_count; + return zq->queue->total_request_count > + pref_zq->queue->total_request_count; return weight > pref_weight; } From 8e138e0d92c6c9d3d481674fb14e3439b495be37 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Nov 2017 14:50:46 -0500 Subject: [PATCH 050/406] btrfs: clear space cache inode generation always We discovered a box that had double allocations, and suspected the space cache may be to blame. While auditing the write out path I noticed that if we've already setup the space cache we will just carry on. This means that any error we hit after cache_save_setup before we go to actually write the cache out we won't reset the inode generation, so whatever was already written will be considered correct, except it'll be stale. Fix this by _always_ resetting the generation on the block group inode, this way we only ever have valid or invalid cache. With this patch I was no longer able to reproduce cache corruption with dm-log-writes and my bpf error injection tool. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 673ac4e01dd07..784d41e95ed95 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3502,13 +3502,6 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3532,6 +3525,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(fs_info, &fs_info->global_block_rsv); From 10809bb976648ac58194a629e3d7af99e7400297 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: [PATCH 051/406] ACPI / bus: Leave modalias empty for devices which are not present Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 24418932612ee..a041689e5701d 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the From a64a62ce9a380213dc9e192f762266d70c9b40ec Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 26 Sep 2017 16:54:09 +0800 Subject: [PATCH 052/406] ACPI / EC: Fix regression related to PM ops support in ECDT device On platforms (ASUS X550ZE and possibly all ASUS X series) with valid ECDT EC but invalid DSDT EC, EC PM ops won't be invoked as ECDT EC is not an ACPI device. Thus the following commit actually removed post-resume acpi_ec_enable_event() invocation for such platforms, and triggered a regression on them that after being resumed, EC (actually should be ECDT) driver stops handling EC events: Commit: c2b46d679b30c5c0d7eb47a21085943242bdd8dc Subject: ACPI / EC: Add PM operations to improve event handling for resume process Notice that the root cause actually is "ECDT is not an ACPI device" rather than "the timing of acpi_ec_enable_event() invocation", this patch fixes this issue by enumerating ECDT EC as an ACPI device. Due to the existence of the noirq stage, the ability of tuning the timing of acpi_ec_enable_event() invocation is still meaningful. This patch is a little bit different from the posted fix by moving acpi_config_boot_ec() from acpi_ec_ecdt_start() to acpi_ec_add() to make sure that EC event handling won't be stopped as long as the ACPI EC driver is bound. Thus the following sequence shouldn't disable EC event handling: unbind,suspend,resume,bind. Fixes: c2b46d679b30 (ACPI / EC: Add PM operations to improve event handling for resume process) Link: https://bugzilla.kernel.org/show_bug.cgi?id=196847 Reported-by: Luya Tshimbalanga Tested-by: Luya Tshimbalanga Cc: 4.9+ # 4.9+ Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 69 ++++++++++++++++++++++++------------- drivers/acpi/internal.h | 1 + drivers/acpi/scan.c | 21 +++++++++++ include/acpi/acpi_bus.h | 1 + include/acpi/acpi_drivers.h | 1 + 5 files changed, 69 insertions(+), 24 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 82b3ce5e937e8..df842465634a9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1597,32 +1597,41 @@ static int acpi_ec_add(struct acpi_device *device) { struct acpi_ec *ec = NULL; int ret; + bool is_ecdt = false; + acpi_status status; strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_EC_CLASS); - ec = acpi_ec_alloc(); - if (!ec) - return -ENOMEM; - if (ec_parse_device(device->handle, 0, ec, NULL) != - AE_CTRL_TERMINATE) { + if (!strcmp(acpi_device_hid(device), ACPI_ECDT_HID)) { + is_ecdt = true; + ec = boot_ec; + } else { + ec = acpi_ec_alloc(); + if (!ec) + return -ENOMEM; + status = ec_parse_device(device->handle, 0, ec, NULL); + if (status != AE_CTRL_TERMINATE) { ret = -EINVAL; goto err_alloc; + } } if (acpi_is_boot_ec(ec)) { - boot_ec_is_ecdt = false; - /* - * Trust PNP0C09 namespace location rather than ECDT ID. - * - * But trust ECDT GPE rather than _GPE because of ASUS quirks, - * so do not change boot_ec->gpe to ec->gpe. - */ - boot_ec->handle = ec->handle; - acpi_handle_debug(ec->handle, "duplicated.\n"); - acpi_ec_free(ec); - ec = boot_ec; - ret = acpi_config_boot_ec(ec, ec->handle, true, false); + boot_ec_is_ecdt = is_ecdt; + if (!is_ecdt) { + /* + * Trust PNP0C09 namespace location rather than + * ECDT ID. But trust ECDT GPE rather than _GPE + * because of ASUS quirks, so do not change + * boot_ec->gpe to ec->gpe. + */ + boot_ec->handle = ec->handle; + acpi_handle_debug(ec->handle, "duplicated.\n"); + acpi_ec_free(ec); + ec = boot_ec; + } + ret = acpi_config_boot_ec(ec, ec->handle, true, is_ecdt); } else ret = acpi_ec_setup(ec, true); if (ret) @@ -1635,8 +1644,10 @@ static int acpi_ec_add(struct acpi_device *device) ret = !!request_region(ec->command_addr, 1, "EC cmd"); WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr); - /* Reprobe devices depending on the EC */ - acpi_walk_dep_device_list(ec->handle); + if (!is_ecdt) { + /* Reprobe devices depending on the EC */ + acpi_walk_dep_device_list(ec->handle); + } acpi_handle_debug(ec->handle, "enumerated.\n"); return 0; @@ -1692,6 +1703,7 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context) static const struct acpi_device_id ec_device_ids[] = { {"PNP0C09", 0}, + {ACPI_ECDT_HID, 0}, {"", 0}, }; @@ -1764,11 +1776,14 @@ static int __init acpi_ec_ecdt_start(void) * Note: ec->handle can be valid if this function is called after * acpi_ec_add(), hence the fast path. */ - if (boot_ec->handle != ACPI_ROOT_OBJECT) - handle = boot_ec->handle; - else if (!acpi_ec_ecdt_get_handle(&handle)) - return -ENODEV; - return acpi_config_boot_ec(boot_ec, handle, true, true); + if (boot_ec->handle == ACPI_ROOT_OBJECT) { + if (!acpi_ec_ecdt_get_handle(&handle)) + return -ENODEV; + boot_ec->handle = handle; + } + + /* Register to ACPI bus with PM ops attached */ + return acpi_bus_register_early_device(ACPI_BUS_TYPE_ECDT_EC); } #if 0 @@ -2020,6 +2035,12 @@ int __init acpi_ec_init(void) /* Drivers must be started after acpi_ec_query_init() */ dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); + /* + * Register ECDT to ACPI bus only when PNP0C09 probe fails. This is + * useful for platforms (confirmed on ASUS X550ZE) with valid ECDT + * settings but invalid DSDT settings. + * https://bugzilla.kernel.org/show_bug.cgi?id=196847 + */ ecdt_fail = acpi_ec_ecdt_start(); return ecdt_fail && dsdt_fail ? -ENODEV : 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4361c4415b4f4..ede83d38beed5 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -115,6 +115,7 @@ bool acpi_device_is_present(const struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); +int acpi_bus_register_early_device(int type); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 602f8ff212f2c..2f2f50322ffb7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1024,6 +1024,9 @@ static void acpi_device_get_busid(struct acpi_device *device) case ACPI_BUS_TYPE_SLEEP_BUTTON: strcpy(device->pnp.bus_id, "SLPF"); break; + case ACPI_BUS_TYPE_ECDT_EC: + strcpy(device->pnp.bus_id, "ECDT"); + break; default: acpi_get_name(device->handle, ACPI_SINGLE_NAME, &buffer); /* Clean up trailing underscores (if any) */ @@ -1304,6 +1307,9 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, case ACPI_BUS_TYPE_SLEEP_BUTTON: acpi_add_id(pnp, ACPI_BUTTON_HID_SLEEPF); break; + case ACPI_BUS_TYPE_ECDT_EC: + acpi_add_id(pnp, ACPI_ECDT_HID); + break; } } @@ -2049,6 +2055,21 @@ void acpi_bus_trim(struct acpi_device *adev) } EXPORT_SYMBOL_GPL(acpi_bus_trim); +int acpi_bus_register_early_device(int type) +{ + struct acpi_device *device = NULL; + int result; + + result = acpi_add_single_object(&device, NULL, + type, ACPI_STA_DEFAULT); + if (result) + return result; + + device->flags.match_driver = true; + return device_attach(&device->dev); +} +EXPORT_SYMBOL_GPL(acpi_bus_register_early_device); + static int acpi_bus_scan_fixed(void) { int result = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index fa1505292f6cd..324a04df3785b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -105,6 +105,7 @@ enum acpi_bus_device_type { ACPI_BUS_TYPE_THERMAL, ACPI_BUS_TYPE_POWER_BUTTON, ACPI_BUS_TYPE_SLEEP_BUTTON, + ACPI_BUS_TYPE_ECDT_EC, ACPI_BUS_DEVICE_TYPE_COUNT }; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 29c691265b493..14499757338f6 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -58,6 +58,7 @@ #define ACPI_VIDEO_HID "LNXVIDEO" #define ACPI_BAY_HID "LNXIOBAY" #define ACPI_DOCK_HID "LNXDOCK" +#define ACPI_ECDT_HID "LNXEC" /* Quirk for broken IBM BIOSes */ #define ACPI_SMBUS_IBM_HID "SMBUSIBM" From f4359cedfb43b934f38c50d1604db21333abe57b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Nov 2017 16:03:01 +0100 Subject: [PATCH 053/406] drm/i915: Fix false-positive assert_rpm_wakelock_held in i915_pmic_bus_access_notifier v2 assert_rpm_wakelock_held is triggered from i915_pmic_bus_access_notifier even though it gets unregistered on (runtime) suspend, this is caused by a race happening under the following circumstances: intel_runtime_pm_put does: atomic_dec(&dev_priv->pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); And pm_runtime_put_autosuspend calls intel_runtime_suspend from a workqueue, so there is ample of time between the atomic_dec() and intel_runtime_suspend() unregistering the notifier. If the notifier gets called in this windowd assert_rpm_wakelock_held falsely triggers (at this point we're not runtime-suspended yet). This commit adds disable_rpm_wakeref_asserts and enable_rpm_wakeref_asserts calls around the intel_uncore_forcewake_get(FORCEWAKE_ALL) call in i915_pmic_bus_access_notifier fixing the false-positive WARN_ON. Changes in v2: -Reword comment explaining why disabling the wakeref asserts is ok and necessary Cc: stable@vger.kernel.org Reported-by: FKr Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171110150301.9601-2-hdegoede@redhat.com (cherry picked from commit ce30560c80dead91e98a03d90fb8791e57a9b69d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_uncore.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 20e3c65c0999f..cb48c8b92aa33 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1240,8 +1240,15 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, * bus, which will be busy after this notification, leading to: * "render: timed out waiting for forcewake ack request." * errors. + * + * The notifier is unregistered during intel_runtime_suspend(), + * so it's ok to access the HW here without holding a RPM + * wake reference -> disable wakeref asserts for the time of + * the access. */ + disable_rpm_wakeref_asserts(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + enable_rpm_wakeref_asserts(dev_priv); break; case MBI_PMIC_BUS_ACCESS_END: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); From 294cf1af8cf2eb0d1eced377fdfb9a2d3f0e8b42 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 14:55:17 +0100 Subject: [PATCH 054/406] drm/i915: Re-register PMIC bus access notifier on runtime resume intel_uncore_suspend() unregisters the uncore code's PMIC bus access notifier and gets called on both normal and runtime suspend. intel_uncore_resume_early() re-registers the notifier, but only on normal resume. Add a new intel_uncore_runtime_resume() function which only re-registers the notifier and call that on runtime resume. Cc: stable@vger.kernel.org Reported-by: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171114135518.15981-2-hdegoede@redhat.com (cherry picked from commit bedf4d79c3654921839b62246b0965ddb308b201) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_uncore.c | 6 ++++++ drivers/gpu/drm/i915/intel_uncore.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3db5851756f06..34191028bbade 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2618,6 +2618,8 @@ static int intel_runtime_resume(struct device *kdev) ret = vlv_resume_prepare(dev_priv, true); } + intel_uncore_runtime_resume(dev_priv); + /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index cb48c8b92aa33..8c2ce81f01c2e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -434,6 +434,12 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv) +{ + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); +} + void intel_uncore_sanitize(struct drm_i915_private *dev_priv) { i915_modparams.enable_rc6 = diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 582771251b57a..9ce079b5dd0d8 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -134,6 +134,7 @@ bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv void intel_uncore_fini(struct drm_i915_private *dev_priv); void intel_uncore_suspend(struct drm_i915_private *dev_priv); void intel_uncore_resume_early(struct drm_i915_private *dev_priv); +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); From dcd1d8302a06b0a0e5d6f9b6851fa4a5579e79b0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 16:55:34 +0100 Subject: [PATCH 055/406] drm/i915/gvt: ensure -ve return value is handled correctly An earlier fix changed the return type from find_bb_size however the integer return is being assigned to a unsigned int so the -ve error check will never be detected. Make bb_size an int to fix this. Detected by CoverityScan CID#1456886 ("Unsigned compared against 0") Fixes: 1e3197d6ad73 ("drm/i915/gvt: Refine error handling for perform_bb_shadow") Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang (cherry picked from commit 24f8a29af4afe7c53e08f4afa0c3fa9eb3791b89) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 701a3c6f16696..85d4c57870fb7 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1628,7 +1628,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; - uint32_t bb_size; + int bb_size; void *dst = NULL; int ret = 0; From 6e068270b7883836e4717e262d96d1c43690862a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 12:14:58 +0000 Subject: [PATCH 056/406] drm/i915: Clear breadcrumb node when cancelling signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we call intel_engine_cancel_signaling() to stop reporting when a request is completed via an asynchronous signal, we remove that request from the breadcrumb wait queue. However, we may be concurrently processing that request in the signaler itself, the actual operations on the request's node itself are serialised but we do not actually clear the waiter after removing it from the tree allowing both parties to attempt to do so and corrupting the rbtree. (Previously removing from the breadcrumb wait queue could only be done on behalf of i915_wait_request, so this race could not happen). Reported-by: "He, Bo" Fixes: 9eb143bbec7d ("drm/i915: Allow a request to be cancelled") Signed-off-by: Chris Wilson Cc: "He, Bo" Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171115121458.24655-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit c534612e780c4a2c8ef5bfc11583c7d58436baca) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 48e1ba01ccf88..5f8b9f1f40f19 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -517,6 +517,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); + RB_CLEAR_NODE(&wait->node); out: GEM_BUG_ON(b->irq_wait == wait); From 457db89b538ea4eb2a188c75f8f3a83469395ee0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 17:35:20 +0000 Subject: [PATCH 057/406] drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") tried to fixup the check_flush_dependency warning for hitting i915_gem_userptr_mn_invalidate_range_start from within the shrinker, but I failed to notice userptr has 2 similarly named workqueues. I marked up i915-userptr-acquire as WQ_MEM_RECLAIM whereas we only wait upon i915-userptr-release from inside the reclaim paths. [62530.869510] workqueue: PF_MEMALLOC task 7983(gem_shrink) is flushing !WQ_MEM_RECLAIM i915-userptr-release: (null) [62530.869515] ------------[ cut here ]------------ [62530.869519] WARNING: CPU: 1 PID: 7983 at kernel/workqueue.c:2434 check_flush_dependency+0x7f/0x110 [62530.869519] Modules linked in: pegasus mii ip6table_filter ip6_tables bnep iptable_filter snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic binfmt_misc nls_iso8859_1 intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel snd_hda_codec kvm_intel snd_hda_core snd_hwdep kvm snd_pcm irqbypass snd_seq_midi snd_seq_midi_event snd_rawmidi crct10dif_pclmul crc32_pclmul 8250_dw ghash_clmulni_intel snd_seq pcbc snd_seq_device snd_timer btusb aesni_intel btrtl btbcm aes_x86_64 iwlwifi btintel crypto_simd glue_helper cryptd bluetooth snd intel_cstate input_leds idma64 intel_rapl_perf ecdh_generic serio_raw soundcore cfg80211 wmi_bmof virt_dma intel_lpss_pci intel_lpss acpi_als kfifo_buf industrialio winbond_cir soc_button_array rc_core spidev tpm_crb intel_hid acpi_pad mac_hid sparse_keymap [62530.869546] parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid i915 i2c_algo_bit prime_numbers drm_kms_helper syscopyarea e1000e sysfillrect sysimgblt fb_sys_fops ahci ptp pps_core libahci drm wmi video i2c_hid hid [62530.869557] CPU: 1 PID: 7983 Comm: gem_shrink Tainted: G U W L 4.14.0-rc8-drm-tip-ww45-commit-1342299+ #1 [62530.869558] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake H DDR4 RVP, BIOS CNLSFWR1.R00.X098.A00.1707301945 07/30/2017 [62530.869559] task: ffffa1049dbeec80 task.stack: ffffae7d05c44000 [62530.869560] RIP: 0010:check_flush_dependency+0x7f/0x110 [62530.869561] RSP: 0018:ffffae7d05c473a0 EFLAGS: 00010286 [62530.869562] RAX: 000000000000006e RBX: ffffa1049540f400 RCX: ffffffffa3e55788 [62530.869562] RDX: 0000000000000000 RSI: 0000000000000092 RDI: 0000000000000202 [62530.869563] RBP: ffffae7d05c473c0 R08: 000000000000006e R09: 000000000038bb0e [62530.869563] R10: 0000000000000000 R11: 000000000000006e R12: ffffa1049dbeec80 [62530.869564] R13: 0000000000000000 R14: 0000000000000000 R15: ffffae7d05c473e0 [62530.869565] FS: 00007f621b129880(0000) GS:ffffa1050b240000(0000) knlGS:0000000000000000 [62530.869566] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [62530.869566] CR2: 00007f6214400000 CR3: 0000000353a17003 CR4: 00000000003606e0 [62530.869567] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [62530.869567] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [62530.869568] Call Trace: [62530.869570] flush_workqueue+0x115/0x3d0 [62530.869573] ? wake_up_process+0x15/0x20 [62530.869596] i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869614] ? i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869616] __mmu_notifier_invalidate_range_start+0x55/0x80 [62530.869618] try_to_unmap_one+0x791/0x8b0 [62530.869620] ? call_rwsem_down_read_failed+0x18/0x30 [62530.869622] rmap_walk_anon+0x10b/0x260 [62530.869624] rmap_walk+0x48/0x60 [62530.869625] try_to_unmap+0x93/0xf0 [62530.869626] ? page_remove_rmap+0x2a0/0x2a0 [62530.869627] ? page_not_mapped+0x20/0x20 [62530.869629] ? page_get_anon_vma+0x90/0x90 [62530.869630] ? invalid_mkclean_vma+0x20/0x20 [62530.869631] migrate_pages+0x946/0xaa0 [62530.869633] ? __ClearPageMovable+0x10/0x10 [62530.869635] ? isolate_freepages_block+0x3c0/0x3c0 [62530.869636] compact_zone+0x22f/0x970 [62530.869638] compact_zone_order+0xa3/0xd0 [62530.869640] try_to_compact_pages+0x1a5/0x2a0 [62530.869641] ? try_to_compact_pages+0x1a5/0x2a0 [62530.869643] __alloc_pages_direct_compact+0x50/0x110 [62530.869644] __alloc_pages_slowpath+0x4da/0xf30 [62530.869646] __alloc_pages_nodemask+0x262/0x280 [62530.869648] alloc_pages_vma+0x165/0x1e0 [62530.869649] shmem_alloc_hugepage+0xd0/0x130 [62530.869651] ? __radix_tree_insert+0x45/0x230 [62530.869652] ? __vm_enough_memory+0x29/0x130 [62530.869654] shmem_alloc_and_acct_page+0x10d/0x1e0 [62530.869655] shmem_getpage_gfp+0x426/0xc00 [62530.869657] shmem_fault+0xa0/0x1e0 [62530.869659] ? file_update_time+0x60/0x110 [62530.869660] __do_fault+0x1e/0xc0 [62530.869661] __handle_mm_fault+0xa35/0x1170 [62530.869662] handle_mm_fault+0xcc/0x1c0 [62530.869664] __do_page_fault+0x262/0x4f0 [62530.869666] do_page_fault+0x2e/0xe0 [62530.869667] page_fault+0x22/0x30 [62530.869668] RIP: 0033:0x404335 [62530.869669] RSP: 002b:00007fff7829e420 EFLAGS: 00010216 [62530.869670] RAX: 00007f6210400000 RBX: 0000000000000004 RCX: 0000000000b80000 [62530.869670] RDX: 0000000000002e01 RSI: 0000000000008000 RDI: 0000000000000004 [62530.869671] RBP: 0000000000000019 R08: 0000000000000002 R09: 0000000000000000 [62530.869671] R10: 0000000000000559 R11: 0000000000000246 R12: 0000000008000000 [62530.869672] R13: 00000000004042f0 R14: 0000000000000004 R15: 000000000000007e [62530.869673] Code: 00 8b b0 18 05 00 00 48 8d 8b b0 00 00 00 48 8d 90 c0 06 00 00 4d 89 f0 48 c7 c7 40 c0 c8 a3 c6 05 68 c5 e8 00 01 e8 c2 68 04 00 <0f> ff 4d 85 ed 74 18 49 8b 45 20 48 8b 70 08 8b 86 00 01 00 00 [62530.869691] ---[ end trace 01e01ad0ff5781f8 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103739 Fixes: 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") Signed-off-by: Chris Wilson Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171114173520.8829-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld (cherry picked from commit 41729bf2248bc8593e5103d43974079cc269524c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e26b23171b56f..ccde12d9e5f54 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -172,7 +172,9 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); + mn->wq = alloc_workqueue("i915-userptr-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0); if (mn->wq == NULL) { kfree(mn); return ERR_PTR(-ENOMEM); @@ -827,7 +829,7 @@ int i915_gem_init_userptr(struct drm_i915_private *dev_priv) dev_priv->mm.userptr_wq = alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_MEM_RECLAIM, + WQ_HIGHPRI | WQ_UNBOUND, 0); if (!dev_priv->mm.userptr_wq) return -ENOMEM; From 3572f04c69ed4369da5d3c65d84fb18774aa60b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 16 Nov 2017 18:02:15 +0200 Subject: [PATCH 058/406] drm/i915: Fix init_clock_gating for resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving the init_clock_gating() call from intel_modeset_init_hw() to intel_modeset_gem_init() had an unintended effect of not applying some workarounds on resume. This, for example, cause some kind of corruption to appear at the top of my IVB Thinkpad X1 Carbon LVDS screen after hibernation. Fix the problem by explicitly calling init_clock_gating() from the resume path. I really hope this doesn't break something else again. At least the problems reported at https://bugs.freedesktop.org/show_bug.cgi?id=103549 didn't make a comeback, even after a hibernate cycle. v2: Reorder the init_clock_gating vs. modeset_init_hw to match the display reset path (Rodrigo) Cc: stable@vger.kernel.org Cc: Chris Wilson Cc: Rodrigo Vivi Fixes: 6ac43272768c ("drm/i915: Move init_clock_gating() back to where it was") Reviewed-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171116160215.25715-1-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 675f7ff35bd256e65d3d0f52718d8babf5d1002a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 34191028bbade..7d9b07df32fad 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1714,6 +1714,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_guc_resume(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) From 0d307935fefa6389eb726c6362351c162c949101 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: [PATCH 059/406] cpufreq: Add Loongson machine dependencies The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4ebae43118eff..d8addbce40bcc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -275,6 +275,7 @@ config BMIPS_CPUFREQ config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -287,6 +288,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. From 7e8a09e05a0f0ccaf6b2a16f12ed6edc8e62c47d Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 13:32:01 -0800 Subject: [PATCH 060/406] cpufreq: mediatek: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/cpufreq/mediatek-cpufreq.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mediatek-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 18c4bd9a5c656..e0d5090b303dd 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -620,3 +620,7 @@ static int __init mtk_cpufreq_driver_init(void) return 0; } device_initcall(mtk_cpufreq_driver_init); + +MODULE_DESCRIPTION("MediaTek CPUFreq driver"); +MODULE_AUTHOR("Pi-Cheng Chen "); +MODULE_LICENSE("GPL v2"); From 1addb798e93893d33c8dfab743cd44f09fd7719a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 8 Nov 2017 17:29:44 +0100 Subject: [PATCH 061/406] null_blk: fix dev->badblocks leak null_alloc_dev() allocates memory for dev->badblocks, but cleanup currently only occurs in the configfs release codepath, missing a number of other places. This bug was found running the blktests block/010 test, alongside kmemleak: rapido1:/blktests# ./check block/010 ... rapido1:/blktests# echo scan > /sys/kernel/debug/kmemleak [ 306.966708] kmemleak: 32 new suspected memory leaks (see /sys/kernel/debug/kmemleak) rapido1:/blktests# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88001f86d000 (size 4096): comm "modprobe", pid 231, jiffies 4294892415 (age 318.252s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x9f/0xe0 [] badblocks_init+0x2f/0x60 [] 0xffffffffa0019fae [] nullb_device_badblocks_store+0x63/0x130 [null_blk] [] do_one_initcall+0x3d/0x170 [] do_init_module+0x56/0x1e9 [] load_module+0x1c47/0x26a0 [] SyS_finit_module+0xa9/0xd0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Fixes: 2f54a613c942 ("nullb: badbblocks support") Reviewed-by: Shaohua Li Signed-off-by: David Disseldorp Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c61960deb74aa..ccb9975a97fa3 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); - badblocks_exit(&dev->badblocks); null_free_device_storage(dev, false); null_free_dev(dev); } @@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void) static void null_free_dev(struct nullb_device *dev) { + if (!dev) + return; + + badblocks_exit(&dev->badblocks); kfree(dev); } From f341a4d384f7fced2ca0d9472ed88fe94de32726 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 22 Nov 2017 13:18:05 -0500 Subject: [PATCH 062/406] block: remove useless assignment in bio_split Remove useless assignment to the variable "split" because the variable is unconditionally assigned later. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 228229f3bb76d..8bfdea58159ba 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio); struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { - struct bio *split = NULL; + struct bio *split; BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); From ded13fc11b71fd1351e57c68a130d89a0285f1b6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Nov 2017 14:38:53 +1100 Subject: [PATCH 063/406] KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts This fixes two errors that prevent a guest using the HPT MMU from successfully migrating to a POWER9 host in radix MMU mode, or resizing its HPT when running on a radix host. The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information", 2017-09-11) missed two uses of hpte_base_page_size(), one in the HPT rehashing code and one in kvm_htab_write() (which is used on the destination side in migrating a HPT guest). Instead we use kvmppc_hpte_base_page_shift(). Having the shift count means that we can use left and right shifts instead of multiplication and division in a few places. Along the way, this adds a check in kvm_htab_write() to ensure that the page size encoding in the incoming HPTEs is recognized, and if not return an EINVAL error to userspace. The second bug was that kvm_htab_write was performing some but not all of the functions of kvmhv_setup_mmu(), resulting in the destination VM being left in radix mode as far as the hardware is concerned. The simplest fix for now is make kvm_htab_write() call kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does. In future it would be better to refactor the code more extensively to remove the duplication. Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information") Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather than SDR1 on POWER9") Reported-by: Suraj Jitindar Singh Tested-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 ++++++++++++++++++----------- arch/powerpc/kvm/book3s_hv.c | 3 +-- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 96753f3aac6dd..941c2a3f231b9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct iommu_group *grp); extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); +extern void kvmppc_setup_partition_table(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 235319c2574e0..966097232d214 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, unsigned long vpte, rpte, guest_rpte; int ret; struct revmap_entry *rev; - unsigned long apsize, psize, avpn, pteg, hash; + unsigned long apsize, avpn, pteg, hash; unsigned long new_idx, new_pteg, replace_vpte; + int pshift; hptep = (__be64 *)(old->virt + (idx << 4)); @@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, goto out; rpte = be64_to_cpu(hptep[1]); - psize = hpte_base_page_size(vpte, rpte); - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); pteg = idx / HPTES_PER_GROUP; if (vpte & HPTE_V_SECONDARY) pteg = ~pteg; @@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, offset = (avpn & 0x1f) << 23; vsid = avpn >> 5; /* We can find more bits from the pteg value */ - if (psize < (1ULL << 23)) - offset |= ((vsid ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (offset / psize); + hash = vsid ^ (offset >> pshift); } else { unsigned long offset, vsid; /* We only have 40 - 23 bits of seg_off in avpn */ offset = (avpn & 0x1ffff) << 23; vsid = avpn >> 17; - if (psize < (1ULL << 23)) - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (vsid << 25) ^ (offset / psize); + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); } new_pteg = hash & new_hash_mask; @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ssize_t nb; long int err, ret; int mmu_ready; + int pshift; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; + pshift = kvmppc_hpte_base_page_shift(v, r); + if (pshift <= 0) + goto out; lbuf += 2; nb += HPTE_SIZE; @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!mmu_ready && is_vrma_hpte(v)) { - unsigned long psize = hpte_base_page_size(v, r); - unsigned long senc = slb_pgsize_encoding(psize); - unsigned long lpcr; + unsigned long senc, lpcr; + senc = slb_pgsize_encoding(1ul << pshift); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = senc << (LPCR_VRMASD_SH - 4); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, + LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } mmu_ready = 1; } ++i; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 79ea3d9269dbf..2d46037ce9366 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -static void kvmppc_setup_partition_table(struct kvm *kvm); static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } -static void kvmppc_setup_partition_table(struct kvm *kvm) +void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1; From 8c97eeccf0ad8783c057830119467b877bdfced7 Mon Sep 17 00:00:00 2001 From: Jeff Lien Date: Tue, 21 Nov 2017 10:44:37 -0600 Subject: [PATCH 064/406] nvme-pci: add quirk for delay before CHK RDY for WDC SN200 And increase the existing delay to cover this device as well. Cc: stable@vger.kernel.org Signed-off-by: Jeff Lien Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c0873a68872fb..ea1aa5283e8ed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req) * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * found empirically. */ -#define NVME_QUIRK_DELAY_AMOUNT 2000 +#define NVME_QUIRK_DELAY_AMOUNT 2300 enum nvme_ctrl_state { NVME_CTRL_NEW, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58dbe684007bd..617374762b7c5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2673,6 +2673,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ From b8a3365a30c463e9105969ab1bf8674b763e3408 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 23 Nov 2017 11:37:37 +0100 Subject: [PATCH 065/406] drm/vblank: Pass crtc_id to page_flip_ioctl. We added crtc_id to the atomic ioctl, but forgot to add it for vblank and page flip events. Commit bd386e518056 ("drm: Reorganize drm_pending_event to support future event types [v2]") added it to the vblank event, but page flip event was still missing. Correct this and add a test for making sure we always set crtc_id correctly. Fixes: bd386e518056 ("drm: Reorganize drm_pending_event to support future event types [v2]") Fixes: 5db06a8a98f5 ("drm: Pass CRTC ID in userspace vblank events") Cc: Daniel Stone Cc: Daniel Vetter Cc: Gustavo Padovan Cc: Sean Paul Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Reviewed-by: Daniel Vetter #irc Testcase: igt/kms_vblank/crtc_id Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171123103737.47138-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 19404e34cd592..37a93cdffb4ad 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1030,6 +1030,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.user_data = page_flip->user_data; + e->event.vbl.crtc_id = crtc->base.id; ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); From 12841f87b7a8ceb3d54f171660f72a86941bfcb3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 23 Nov 2017 09:08:57 +0530 Subject: [PATCH 066/406] cxl: Check if vphb exists before iterating over AFU devices During an eeh a kernel-oops is reported if no vPHB is allocated to the AFU. This happens as during AFU init, an error in creation of vPHB is a non-fatal error. Hence afu->phb should always be checked for NULL before iterating over it for the virtual AFU pci devices. This patch fixes the kenel-oops by adding a NULL pointer check for afu->phb before it is dereferenced. Fixes: 9e8df8a21963 ("cxl: EEH support") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index bb7fd3f4edab7..19969ee86d6f7 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -2083,6 +2083,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ + if (afu->phb == NULL) + return result; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (!afu_dev->driver) continue; @@ -2124,8 +2127,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, * Tell the AFU drivers; but we don't care what they * say, we're going away. */ - if (afu->phb != NULL) - cxl_vphb_error_detected(afu, state); + cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; } @@ -2265,6 +2267,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_afu_select_best_mode(afu)) goto err; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -2327,6 +2332,9 @@ static void cxl_pci_resume(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (afu_dev->driver && afu_dev->driver->err_handler && afu_dev->driver->err_handler->resume) From a3961f824cdbe7eb431254dc7d8f6f6767f474aa Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 22 Nov 2017 23:02:07 +0530 Subject: [PATCH 067/406] powerpc/powernv: Fix kexec crashes caused by tlbie tracing Rebooting into a new kernel with kexec fails in trace_tlbie() which is called from native_hpte_clear(). This happens if the running kernel has CONFIG_LOCKDEP enabled. With lockdep enabled, the tracepoints always execute few RCU checks regardless of whether tracing is on or off. We are already in the last phase of kexec sequence in real mode with HILE_BE set. At this point the RCU check ends up in RCU_LOCKDEP_WARN and causes kexec to fail. Fix this by not calling trace_tlbie() from native_hpte_clear(). mpe: It's not safe to call trace points at this point in the kexec path, even if we could avoid the RCU checks/warnings. The only solution is to not call them. Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Mahesh Salgaonkar Reported-by: Aneesh Kumar K.V Suggested-by: Michael Ellerman Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3848af167df9d..640cf566e9865 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,7 +47,8 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline unsigned long ___tlbie(unsigned long vpn, int psize, + int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } - trace_tlbie(0, 0, va, 0, 0, 0, 0); + return va; +} + +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + unsigned long rb; + + rb = ___tlbie(vpn, psize, apsize, ssize); + trace_tlbie(0, 0, rb, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -652,7 +661,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); hptep->v = 0; - __tlbie(vpn, psize, apsize, ssize); + ___tlbie(vpn, psize, apsize, ssize); } } From 34c089e806793a66e450b11bd167db6047399fcd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:27 +0200 Subject: [PATCH 068/406] mmc: block: Fix missing blk_put_request() Ensure blk_get_request() is paired with blk_put_request(). Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ea80ff4cd7f99..f609398585865 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -236,6 +236,7 @@ static ssize_t power_ro_lock_store(struct device *dev, req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (!ret) { pr_info("%s: Locking boot partition ro until next power on\n", @@ -2557,6 +2558,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) *val = ret; ret = 0; } + blk_put_request(req); return ret; } @@ -2587,6 +2589,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); err = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (err) { pr_err("FAILED %d\n", err); goto out_free; From fb8e456e547ed2c699f64665bd8a3b9bde7b9728 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:28 +0200 Subject: [PATCH 069/406] mmc: block: Check return value of blk_get_request() blk_get_request() can fail, always check the return value. Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 3ecd8cf23f88 ("mmc: block: move multi-ioctl() to use block layer") Fixes: 614f0388f580 ("mmc: block: move single ioctl() commands to block requests") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f609398585865..4a319ddbd956d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -233,6 +233,10 @@ static ssize_t power_ro_lock_store(struct device *dev, /* Dispatch locking to the block layer */ req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM); + if (IS_ERR(req)) { + count = PTR_ERR(req); + goto out_put; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -249,7 +253,7 @@ static ssize_t power_ro_lock_store(struct device *dev, set_disk_ro(part_md->disk, 1); } } - +out_put: mmc_blk_put(md); return count; } @@ -625,6 +629,10 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_done; + } idatas[0] = idata; req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; @@ -692,6 +700,10 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_err; + } req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idata; @@ -2551,6 +2563,8 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) /* Ask the block layer about the card status */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) + return PTR_ERR(req); req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -2585,6 +2599,10 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) /* Ask the block layer for the EXT CSD */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out_free; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD; req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); From ebe7dd45cf49e3b49cacbaace17f9f878f21fbea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:29 +0200 Subject: [PATCH 070/406] mmc: core: Do not leave the block driver in a suspended state The block driver must be resumed if the mmc bus fails to suspend the card. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index a4b49e25fe963..7586ff2ad1f17 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -157,6 +157,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } From f9f0da98819503b06b35e61869d18cf3a8cd3323 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:30 +0200 Subject: [PATCH 071/406] mmc: block: Ensure that debugfs files are removed The card is not necessarily being removed, but the debugfs files must be removed when the driver is removed, otherwise they will continue to exist after unbinding the card from the driver. e.g. # echo "mmc1:0001" > /sys/bus/mmc/drivers/mmcblk/unbind # cat /sys/kernel/debug/mmc1/mmc1\:0001/ext_csd [ 173.634584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 173.643356] IP: mmc_ext_csd_open+0x5e/0x170 A complication is that the debugfs_root may have already been removed, so check for that too. Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 44 ++++++++++++++++++++++++++++++++------ drivers/mmc/core/debugfs.c | 1 + 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4a319ddbd956d..ccfa98af1dd3f 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -122,6 +122,10 @@ struct mmc_blk_data { struct device_attribute force_ro; struct device_attribute power_ro_lock; int area_type; + + /* debugfs files (only in main mmc_blk_data) */ + struct dentry *status_dentry; + struct dentry *ext_csd_dentry; }; /* Device type for RPMB character devices */ @@ -2653,7 +2657,7 @@ static const struct file_operations mmc_dbg_ext_csd_fops = { .llseek = default_llseek, }; -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { struct dentry *root; @@ -2663,28 +2667,53 @@ static int mmc_blk_add_debugfs(struct mmc_card *card) root = card->debugfs_root; if (mmc_card_mmc(card) || mmc_card_sd(card)) { - if (!debugfs_create_file("status", S_IRUSR, root, card, - &mmc_dbg_card_status_fops)) + md->status_dentry = + debugfs_create_file("status", S_IRUSR, root, card, + &mmc_dbg_card_status_fops); + if (!md->status_dentry) return -EIO; } if (mmc_card_mmc(card)) { - if (!debugfs_create_file("ext_csd", S_IRUSR, root, card, - &mmc_dbg_ext_csd_fops)) + md->ext_csd_dentry = + debugfs_create_file("ext_csd", S_IRUSR, root, card, + &mmc_dbg_ext_csd_fops); + if (!md->ext_csd_dentry) return -EIO; } return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ + if (!card->debugfs_root) + return; + + if (!IS_ERR_OR_NULL(md->status_dentry)) { + debugfs_remove(md->status_dentry); + md->status_dentry = NULL; + } + + if (!IS_ERR_OR_NULL(md->ext_csd_dentry)) { + debugfs_remove(md->ext_csd_dentry); + md->ext_csd_dentry = NULL; + } +} #else -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ +} + #endif /* CONFIG_DEBUG_FS */ static int mmc_blk_probe(struct mmc_card *card) @@ -2724,7 +2753,7 @@ static int mmc_blk_probe(struct mmc_card *card) } /* Add two debugfs entries */ - mmc_blk_add_debugfs(card); + mmc_blk_add_debugfs(card, md); pm_runtime_set_autosuspend_delay(&card->dev, 3000); pm_runtime_use_autosuspend(&card->dev); @@ -2750,6 +2779,7 @@ static void mmc_blk_remove(struct mmc_card *card) { struct mmc_blk_data *md = dev_get_drvdata(&card->dev); + mmc_blk_remove_debugfs(card, md); mmc_blk_remove_parts(card, md); pm_runtime_get_sync(&card->dev); mmc_claim_host(card->host); diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 01e459a34f332..0f4a7d7b26261 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -314,4 +314,5 @@ void mmc_add_card_debugfs(struct mmc_card *card) void mmc_remove_card_debugfs(struct mmc_card *card) { debugfs_remove_recursive(card->debugfs_root); + card->debugfs_root = NULL; } From 52884f8f66c893e92a73787b5bef32b2ef52e1bc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 20 Nov 2017 11:56:47 -0800 Subject: [PATCH 072/406] mmc: sdhci-msm: Optionally wait for signal level changes Not all instances of the SDCC core supports changing signal voltage and as such will not generate a power interrupt when the software attempts to change the voltage. This results in probing the eMMC on some devices to take over 2 minutes. Check that the SWITCHABLE_SIGNALING_VOLTAGE bit in MCI_GENERICS is set before waiting for the power interrupt. Cc: Sahitya Tummala Cc: Vijay Viswanath Fixes: c0309b3803fe ("mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr irq") Signed-off-by: Bjorn Andersson Tested-by: Luca Weiss Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 3fb7d2eec93f4..c283291db7052 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -29,6 +29,9 @@ #define CORE_VERSION_MAJOR_MASK (0xf << CORE_VERSION_MAJOR_SHIFT) #define CORE_VERSION_MINOR_MASK 0xff +#define CORE_MCI_GENERICS 0x70 +#define SWITCHABLE_SIGNALING_VOLTAGE BIT(29) + #define CORE_HC_MODE 0x78 #define HC_MODE_EN 0x1 #define CORE_POWER 0x0 @@ -1028,11 +1031,22 @@ static void sdhci_msm_check_power_status(struct sdhci_host *host, u32 req_type) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); bool done = false; + u32 val; pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n", mmc_hostname(host->mmc), __func__, req_type, msm_host->curr_pwr_state, msm_host->curr_io_level); + /* + * The power interrupt will not be generated for signal voltage + * switches if SWITCHABLE_SIGNALING_VOLTAGE in MCI_GENERICS is not set. + */ + val = readl(msm_host->core_mem + CORE_MCI_GENERICS); + if ((req_type & REQ_IO_HIGH || req_type & REQ_IO_LOW) && + !(val & SWITCHABLE_SIGNALING_VOLTAGE)) { + return; + } + /* * The IRQ for request type IO High/LOW will be generated when - * there is a state change in 1.8V enable bit (bit 3) of From 33d22c2ed6dbe1199fc78e84549b1117fd652c01 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 23 Nov 2017 12:12:17 +1000 Subject: [PATCH 073/406] drm/ttm: don't attempt to use hugepages if dma32 requested (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit below introduced thp support for ttm allocations, however it didn't take into account the case where dma32 was requested. Some drivers always request dma32, and the bochs driver is one of those. This fixes an oops: [ 30.108507] ------------[ cut here ]------------ [ 30.108920] kernel BUG at ./include/linux/gfp.h:408! [ 30.109356] invalid opcode: 0000 [#1] SMP [ 30.109700] Modules linked in: fuse nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack devlink ip_set nfnetlink ebtable_nat ebtable_broute bridge ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security ebtable_filter ebtables ip6table_filter ip6_tables snd_hda_codec_generic kvm_intel kvm snd_hda_intel snd_hda_codec irqbypass ppdev snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm bochs_drm ttm joydev drm_kms_helper virtio_balloon snd_timer snd parport_pc drm soundcore parport i2c_piix4 nls_utf8 isofs squashfs zstd_decompress xxhash 8021q garp mrp stp llc virtio_net [ 30.115605] virtio_console virtio_scsi crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel serio_raw virtio_pci virtio_ring virtio ata_generic pata_acpi qemu_fw_cfg sunrpc scsi_transport_iscsi loop [ 30.117425] CPU: 0 PID: 1347 Comm: gnome-shell Not tainted 4.15.0-0.rc0.git6.1.fc28.x86_64 #1 [ 30.118141] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 30.118866] task: ffff923a77e03380 task.stack: ffffa78182228000 [ 30.119366] RIP: 0010:__alloc_pages_nodemask+0x35e/0x430 [ 30.119810] RSP: 0000:ffffa7818222bba8 EFLAGS: 00010202 [ 30.120250] RAX: 0000000000000001 RBX: 00000000014382c6 RCX: 0000000000000006 [ 30.120840] RDX: 0000000000000000 RSI: 0000000000000009 RDI: 0000000000000000 [ 30.121443] RBP: ffff923a760d6000 R08: 0000000000000000 R09: 0000000000000006 [ 30.122039] R10: 0000000000000040 R11: 0000000000000300 R12: ffff923a729273c0 [ 30.122629] R13: 0000000000000000 R14: 0000000000000000 R15: ffff923a7483d400 [ 30.123223] FS: 00007fe48da7dac0(0000) GS:ffff923a7cc00000(0000) knlGS:0000000000000000 [ 30.123896] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 30.124373] CR2: 00007fe457b73000 CR3: 0000000078313000 CR4: 00000000000006f0 [ 30.124968] Call Trace: [ 30.125186] ttm_pool_populate+0x19b/0x400 [ttm] [ 30.125578] ttm_bo_vm_fault+0x325/0x570 [ttm] [ 30.125964] __do_fault+0x19/0x11e [ 30.126255] __handle_mm_fault+0xcd3/0x1260 [ 30.126609] handle_mm_fault+0x14c/0x310 [ 30.126947] __do_page_fault+0x28c/0x530 [ 30.127282] do_page_fault+0x32/0x270 [ 30.127593] async_page_fault+0x22/0x30 [ 30.127922] RIP: 0033:0x7fe48aae39a8 [ 30.128225] RSP: 002b:00007ffc21c4d928 EFLAGS: 00010206 [ 30.128664] RAX: 00007fe457b73000 RBX: 000055cd4c1041a0 RCX: 00007fe457b73040 [ 30.129259] RDX: 0000000000300000 RSI: 0000000000000000 RDI: 00007fe457b73000 [ 30.129855] RBP: 0000000000000300 R08: 000000000000000c R09: 0000000100000000 [ 30.130457] R10: 0000000000000001 R11: 0000000000000246 R12: 000055cd4c1041a0 [ 30.131054] R13: 000055cd4bdfe990 R14: 000055cd4c104110 R15: 0000000000000400 [ 30.131648] Code: 11 01 00 0f 84 a9 00 00 00 65 ff 0d 6d cc dd 44 e9 0f ff ff ff 40 80 cd 80 e9 99 fe ff ff 48 89 c7 e8 e7 f6 01 00 e9 b7 fe ff ff <0f> 0b 0f ff e9 40 fd ff ff 65 48 8b 04 25 80 d5 00 00 8b 40 4c [ 30.133245] RIP: __alloc_pages_nodemask+0x35e/0x430 RSP: ffffa7818222bba8 [ 30.133836] ---[ end trace d4f1deb60784f40a ]--- v2: handle free path as well. Reported-by: Laura Abbott Reported-by: Adam Williamson Fixes: 0284f1ead87463bc17cf5e81a24fc65c052486f3 (drm/ttm: add transparent huge page support for cached allocations v2) Reviewed-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 36 +++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 316f831ad5f04..b0551aa677b82 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -744,12 +744,14 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE - for (j = 0; j < HPAGE_PMD_NR; ++j) - if (p++ != pages[i + j]) - break; + if (!(flags & TTM_PAGE_FLAG_DMA32)) { + for (j = 0; j < HPAGE_PMD_NR; ++j) + if (p++ != pages[i + j]) + break; - if (j == HPAGE_PMD_NR) - order = HPAGE_PMD_ORDER; + if (j == HPAGE_PMD_NR) + order = HPAGE_PMD_ORDER; + } #endif if (page_count(pages[i]) != 1) @@ -865,20 +867,22 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, i = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - while (npages >= HPAGE_PMD_NR) { - gfp_t huge_flags = gfp_flags; + if (!(gfp_flags & GFP_DMA32)) { + while (npages >= HPAGE_PMD_NR) { + gfp_t huge_flags = gfp_flags; - huge_flags |= GFP_TRANSHUGE; - huge_flags &= ~__GFP_MOVABLE; - huge_flags &= ~__GFP_COMP; - p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); - if (!p) - break; + huge_flags |= GFP_TRANSHUGE; + huge_flags &= ~__GFP_MOVABLE; + huge_flags &= ~__GFP_COMP; + p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); + if (!p) + break; - for (j = 0; j < HPAGE_PMD_NR; ++j) - pages[i++] = p++; + for (j = 0; j < HPAGE_PMD_NR; ++j) + pages[i++] = p++; - npages -= HPAGE_PMD_NR; + npages -= HPAGE_PMD_NR; + } } #endif From 612ea091fc77770d659b82ea44a1d5646e0af54c Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:03 +0800 Subject: [PATCH 074/406] blk-wbt: remove duplicated setting in wbt_init rwb->wc and rwb->queue_depth were overwritten by wbt_set_write_cache and wbt_set_queue_depth, remove the default setting. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index b252da0e4c110..ec903532a5d1c 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -723,8 +723,6 @@ int wbt_init(struct request_queue *q) init_waitqueue_head(&rwb->rq_wait[i].wait); } - rwb->wc = 1; - rwb->queue_depth = RWB_DEF_DEPTH; rwb->last_comp = rwb->last_issue = jiffies; rwb->queue = q; rwb->win_nsec = RWB_WINDOW_NSEC; From f680474345f1fd6329d1806d6358066fd3b07f02 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:40 +0800 Subject: [PATCH 075/406] blk-sysfs: remove NULL pointer checking in queue_wb_lat_store wbt_init doesn't set q->rq_wb to NULL, if wbt_init return 0, so check return value is enough, remove NULL checking. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e54be402899da..870484eaed1f6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ret = wbt_init(q); if (ret) return ret; - - rwb = q->rq_wb; - if (!rwb) - return -EINVAL; } + rwb = q->rq_wb; if (val == -1) rwb->min_lat_nsec = wbt_default_latency_nsec(q); else if (val >= 0) From 62d772fa9d86856d828cd24dd8fff5c83275c7e1 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:55 +0800 Subject: [PATCH 076/406] blk-wbt: move wbt_clear_stat to common place in wbt_done wbt_done call wbt_clear_stat no matter current stat was tracked or not, move it to common place. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index ec903532a5d1c..19faecc5f8f6f 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) if (wbt_is_read(stat)) wb_timestamp(rwb, &rwb->last_comp); - wbt_clear_state(stat); } else { WARN_ON_ONCE(stat == rwb->sync_cookie); __wbt_done(rwb, wbt_stat_to_mask(stat)); - wbt_clear_state(stat); } + wbt_clear_state(stat); } /* From 3dfbdc44d69b2cd7e382fd084a5c860d2cea24f6 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:40:10 +0800 Subject: [PATCH 077/406] blk-wbt: fix comments typo Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 19faecc5f8f6f..ae8de9780085a 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -481,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, set, then + * kswapd trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing From 8e1fa89aa8bc2870009b4486644e4a58f2e2a4f5 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 10 Nov 2017 11:04:52 +0100 Subject: [PATCH 078/406] crypto: algif_aead - skip SGL entries with NULL page The TX SGL may contain SGL entries that are assigned a NULL page. This may happen if a multi-stage AIO operation is performed where the data for each stage is pointed to by one SGL entry. Upon completion of that stage, af_alg_pull_tsgl will assign NULL to the SGL entry. The NULL cipher used to copy the AAD from TX SGL to the destination buffer, however, cannot handle the case where the SGL starts with an SGL entry having a NULL page. Thus, the code needs to advance the start pointer into the SGL to the first non-NULL entry. This fixes a crash visible on Intel x86 32 bit using the libkcapi test suite. Cc: Fixes: 72548b093ee38 ("crypto: algif_aead - copy AAD from src to dst") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index aacae0837aff7..e2068b78993c5 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -101,10 +101,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, struct aead_tfm *aeadc = pask->private; struct crypto_aead *tfm = aeadc->aead; struct crypto_skcipher *null_tfm = aeadc->null_tfm; - unsigned int as = crypto_aead_authsize(tfm); + unsigned int i, as = crypto_aead_authsize(tfm); struct af_alg_async_req *areq; - struct af_alg_tsgl *tsgl; - struct scatterlist *src; + struct af_alg_tsgl *tsgl, *tmp; + struct scatterlist *rsgl_src, *tsgl_src = NULL; int err = 0; size_t used = 0; /* [in] TX bufs to be en/decrypted */ size_t outlen = 0; /* [out] RX bufs produced by kernel */ @@ -178,7 +178,22 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, } processed = used + ctx->aead_assoclen; - tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list); + list_for_each_entry_safe(tsgl, tmp, &ctx->tsgl_list, list) { + for (i = 0; i < tsgl->cur; i++) { + struct scatterlist *process_sg = tsgl->sg + i; + + if (!(process_sg->length) || !sg_page(process_sg)) + continue; + tsgl_src = process_sg; + break; + } + if (tsgl_src) + break; + } + if (processed && !tsgl_src) { + err = -EFAULT; + goto free; + } /* * Copy of AAD from source to destination @@ -194,7 +209,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, */ /* Use the RX SGL as source (and destination) for crypto op. */ - src = areq->first_rsgl.sgl.sg; + rsgl_src = areq->first_rsgl.sgl.sg; if (ctx->enc) { /* @@ -207,7 +222,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, * v v * RX SGL: AAD || PT || Tag */ - err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + err = crypto_aead_copy_sgl(null_tfm, tsgl_src, areq->first_rsgl.sgl.sg, processed); if (err) goto free; @@ -225,7 +240,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, */ /* Copy AAD || CT to RX SGL buffer for in-place operation. */ - err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + err = crypto_aead_copy_sgl(null_tfm, tsgl_src, areq->first_rsgl.sgl.sg, outlen); if (err) goto free; @@ -257,11 +272,11 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, areq->tsgl); } else /* no RX SGL present (e.g. authentication only) */ - src = areq->tsgl; + rsgl_src = areq->tsgl; } /* Initialize the crypto operation */ - aead_request_set_crypt(&areq->cra_u.aead_req, src, + aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src, areq->first_rsgl.sgl.sg, used, ctx->iv); aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); aead_request_set_tfm(&areq->cra_u.aead_req, tfm); From 7d2c3f54e6f646887d019faa45f35d6fe9fe82ce Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 10 Nov 2017 13:20:55 +0100 Subject: [PATCH 079/406] crypto: af_alg - remove locking in async callback The code paths protected by the socket-lock do not use or modify the socket in a non-atomic fashion. The actions pertaining the socket do not even need to be handled as an atomic operation. Thus, the socket-lock can be safely ignored. This fixes a bug regarding scheduling in atomic as the callback function may be invoked in interrupt context. In addition, the sock_hold is moved before the AIO encrypt/decrypt operation to ensure that the socket is always present. This avoids a tiny race window where the socket is unprotected and yet used by the AIO operation. Finally, the release of resources for a crypto operation is moved into a common function of af_alg_free_resources. Cc: Fixes: e870456d8e7c8 ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae43 ("crypto: algif_aead - overhaul memory management") Reported-by: Romain Izard Signed-off-by: Stephan Mueller Tested-by: Romain Izard Signed-off-by: Herbert Xu --- crypto/af_alg.c | 21 ++++++++++++++------- crypto/algif_aead.c | 23 ++++++++++++----------- crypto/algif_skcipher.c | 23 ++++++++++++----------- include/crypto/if_alg.h | 1 + 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 85cea9de324a4..358749c38894e 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1020,6 +1020,18 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page, } EXPORT_SYMBOL_GPL(af_alg_sendpage); +/** + * af_alg_free_resources - release resources required for crypto request + */ +void af_alg_free_resources(struct af_alg_async_req *areq) +{ + struct sock *sk = areq->sk; + + af_alg_free_areq_sgls(areq); + sock_kfree_s(sk, areq, areq->areqlen); +} +EXPORT_SYMBOL_GPL(af_alg_free_resources); + /** * af_alg_async_cb - AIO callback handler * @@ -1036,18 +1048,13 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) struct kiocb *iocb = areq->iocb; unsigned int resultlen; - lock_sock(sk); - /* Buffer size written by crypto operation. */ resultlen = areq->outlen; - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); - __sock_put(sk); + af_alg_free_resources(areq); + sock_put(sk); iocb->ki_complete(iocb, err ? err : resultlen, 0); - - release_sock(sk); } EXPORT_SYMBOL_GPL(af_alg_async_cb); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index e2068b78993c5..805f485ddf1be 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -283,12 +283,23 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { /* AIO operation */ + sock_hold(sk); areq->iocb = msg->msg_iocb; aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) : crypto_aead_decrypt(&areq->cra_u.aead_req); + + /* AIO operation in progress */ + if (err == -EINPROGRESS || err == -EBUSY) { + /* Remember output size that will be generated. */ + areq->outlen = outlen; + + return -EIOCBQUEUED; + } + + sock_put(sk); } else { /* Synchronous operation */ aead_request_set_callback(&areq->cra_u.aead_req, @@ -300,19 +311,9 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, &ctx->wait); } - /* AIO operation in progress */ - if (err == -EINPROGRESS) { - sock_hold(sk); - - /* Remember output size that will be generated. */ - areq->outlen = outlen; - - return -EIOCBQUEUED; - } free: - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); + af_alg_free_resources(areq); return err ? err : outlen; } diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 9954b078f0b9c..30cff827dd8ff 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -117,6 +117,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { /* AIO operation */ + sock_hold(sk); areq->iocb = msg->msg_iocb; skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, @@ -124,6 +125,16 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, err = ctx->enc ? crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) : crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); + + /* AIO operation in progress */ + if (err == -EINPROGRESS || err == -EBUSY) { + /* Remember output size that will be generated. */ + areq->outlen = len; + + return -EIOCBQUEUED; + } + + sock_put(sk); } else { /* Synchronous operation */ skcipher_request_set_callback(&areq->cra_u.skcipher_req, @@ -136,19 +147,9 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, &ctx->wait); } - /* AIO operation in progress */ - if (err == -EINPROGRESS) { - sock_hold(sk); - - /* Remember output size that will be generated. */ - areq->outlen = len; - - return -EIOCBQUEUED; - } free: - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); + af_alg_free_resources(areq); return err ? err : len; } diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 6abf0a3604dc3..38d9c5861ed8c 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -242,6 +242,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, unsigned int ivsize); ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(struct crypto_async_request *_req, int err); unsigned int af_alg_poll(struct file *file, struct socket *sock, poll_table *wait); From 2621e945fbf1d6df5f3f0ba7be5bae3d2cf9b6a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Nov 2017 14:51:02 +1100 Subject: [PATCH 080/406] powerpc/kexec: Fix kexec/kdump in P9 guest kernels The code that cleans up the IAMR/AMOR before kexec'ing failed to remember that when we're running as a guest AMOR is not writable, it's hypervisor privileged. They symptom is that the kexec stops before entering purgatory and nothing else is seen on the console. If you examine the state of the system all threads will be in the 0x700 program check handler. Fix it by making the write to AMOR dependent on HV mode. Fixes: 1e2a516e89fc ("powerpc/kexec: Fix radix to hash kexec due to IAMR/AMOR") Cc: stable@vger.kernel.org # v4.10+ Reported-by: Yilin Zhang Debugged-by: David Gibson Signed-off-by: Michael Ellerman Acked-by: Balbir Singh Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 8ac0bd2bddb0c..3280953a82cf6 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -623,7 +623,9 @@ BEGIN_FTR_SECTION * NOTE, we rely on r0 being 0 from above. */ mtspr SPRN_IAMR,r0 +BEGIN_FTR_SECTION_NESTED(42) mtspr SPRN_AMOR,r0 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* save regs for local vars on new stack. From 26f4e759ef9b8a2bab1823d692ed6d56d40b66e3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 23 Nov 2017 10:50:23 +0100 Subject: [PATCH 081/406] s390/disassembler: correct disassembly lines alignment 176.718956 Krnl Code: 00000000004d38b0: a54c0018 llihh %r4,24 176.718956 00000000004d38b4: b9080014 agr %r1,%r4 ^ Using a tab to align disassembly lines which follow the first line with "Krnl Code: " doesn't always work, e.g. if there is a prefix (timestamp or syslog prefix) which is not 8 chars aligned. Go back to alignment with spaces. Fixes: b192571d1ae3 ("s390/disassembler: increase show_code buffer size") Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 465d52b5e4708..0a0e14335a045 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -548,7 +548,7 @@ void show_code(struct pt_regs *regs) start += opsize; pr_cont("%s", buffer); ptr = buffer; - ptr += sprintf(ptr, "\n\t "); + ptr += sprintf(ptr, "\n "); hops++; } pr_cont("\n"); From 53c4ab70c11c3ba1b9e3caa8e8c17e9c16d9cbc0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Nov 2017 17:19:32 +0100 Subject: [PATCH 082/406] s390: fix alloc_pgste check in init_new_context again git commit badb8bb983e9 "fix alloc_pgste check in init_new_context" fixed the problem of 'current->mm == NULL' in init_new_context back in 2011. git commit 3eabaee998c7 "KVM: s390: allow sie enablement for multi- threaded programs" completely removed the check against alloc_pgste. git commit 23fefe119ceb "s390/kvm: avoid global config of vm.alloc_pgste=1" re-added a check against the alloc_pgste flag but without the required check for current->mm != NULL. For execve() called by a kernel thread init_new_context() reads from ((struct mm_struct *) NULL)->context.alloc_pgste to decide between 2K vs 4K page tables. If the bit happens to be set for the init process it will be created with large page tables. This decision is inherited by all the children of init, this waste quite some memory. Re-add the check for 'current->mm != NULL'. Fixes: 23fefe119ceb ("s390/kvm: avoid global config of vm.alloc_pgste=1") Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f4a07f788f78b..65154eaa3714a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || - current->mm->context.alloc_pgste; + (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; From 03a6c82218b9a87014b2c6c4e178294fdc8ebd8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: [PATCH 083/406] rxrpc: The mutex lock returned by rxrpc_accept_call() needs releasing The caller of rxrpc_accept_call() must release the lock on call->user_mutex returned by that function. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7d2595582c094..3a99b1a908df5 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -619,8 +619,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call, rxrpc_call_put); - return 0; + ret = 0; + goto out_put_unlock; } call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); @@ -689,6 +689,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len, NULL); } +out_put_unlock: mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); From 48ca24636d5a26f1b7b8e69c54bccf19394843e5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: [PATCH 084/406] rxrpc: Don't set upgrade by default in sendmsg() Don't set upgrade by default when creating a call from sendmsg(). This is a holdover from when I was testing the code. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3a99b1a908df5..94555c94b2d86 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -602,7 +602,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, - .upgrade = true, + .upgrade = false, }; _enter(""); From 9faaff593404a9c4e5abc6839a641635d7b9d0cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: [PATCH 085/406] rxrpc: Provide a different lockdep key for call->user_mutex for kernel calls Provide a different lockdep key for rxrpc_call::user_mutex when the call is made on a kernel socket, such as by the AFS filesystem. The problem is that lockdep registers a false positive between userspace calling the sendmsg syscall on a user socket where call->user_mutex is held whilst userspace memory is accessed whereas the AFS filesystem may perform operations with mmap_sem held by the caller. In such a case, the following warning is produced. ====================================================== WARNING: possible circular locking dependency detected 4.14.0-fscache+ #243 Tainted: G E ------------------------------------------------------ modpost/16701 is trying to acquire lock: (&vnode->io_lock){+.+.}, at: [] afs_begin_vnode_operation+0x33/0x77 [kafs] but task is already holding lock: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: __might_fault+0x61/0x89 _copy_from_iter_full+0x40/0x1fa rxrpc_send_data+0x8dc/0xff3 rxrpc_do_sendmsg+0x62f/0x6a1 rxrpc_sendmsg+0x166/0x1b7 sock_sendmsg+0x2d/0x39 ___sys_sendmsg+0x1ad/0x22b __sys_sendmsg+0x41/0x62 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #2 (&call->user_mutex){+.+.}: __mutex_lock+0x86/0x7d2 rxrpc_new_client_call+0x378/0x80e rxrpc_kernel_begin_call+0xf3/0x154 afs_make_call+0x195/0x454 [kafs] afs_vl_get_capabilities+0x193/0x198 [kafs] afs_vl_lookup_vldb+0x5f/0x151 [kafs] afs_create_volume+0x2e/0x2f4 [kafs] afs_mount+0x56a/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #1 (k-sk_lock-AF_RXRPC){+.+.}: lock_sock_nested+0x74/0x8a rxrpc_kernel_begin_call+0x8a/0x154 afs_make_call+0x195/0x454 [kafs] afs_fs_get_capabilities+0x17a/0x17f [kafs] afs_probe_fileserver+0xf7/0x2f0 [kafs] afs_select_fileserver+0x83f/0x903 [kafs] afs_fetch_status+0x89/0x11d [kafs] afs_iget+0x16f/0x4f8 [kafs] afs_mount+0x6c6/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #0 (&vnode->io_lock){+.+.}: lock_acquire+0x174/0x19f __mutex_lock+0x86/0x7d2 afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 __clear_user+0x3d/0x60 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 other info that might help us debug this: Chain exists of: &vnode->io_lock --> &call->user_mutex --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&call->user_mutex); lock(&mm->mmap_sem); lock(&vnode->io_lock); *** DEADLOCK *** 1 lock held by modpost/16701: #0: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 stack backtrace: CPU: 0 PID: 16701 Comm: modpost Tainted: G E 4.14.0-fscache+ #243 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack+0x67/0x8e print_circular_bug+0x341/0x34f check_prev_add+0x11f/0x5d4 ? add_lock_to_list.isra.12+0x8b/0x8b ? add_lock_to_list.isra.12+0x8b/0x8b ? __lock_acquire+0xf77/0x10b4 __lock_acquire+0xf77/0x10b4 lock_acquire+0x174/0x19f ? afs_begin_vnode_operation+0x33/0x77 [kafs] __mutex_lock+0x86/0x7d2 ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba ? filemap_fault+0x179/0x54d filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 RIP: 0010:__clear_user+0x3d/0x60 RSP: 0018:ffff880071e93da0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000000000011c RCX: 000000000000011c RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000060f720 RBP: 000000000060f720 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffff8800b5459b68 R12: ffff8800ce150e00 R13: 000000000060f720 R14: 00000000006127a8 R15: 0000000000000000 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fdb6009ee07 RSP: 002b:00007fff566d9728 EFLAGS: 00000246 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 000055ba57280900 RCX: 00007fdb6009ee07 RDX: 000055ba5727f270 RSI: 000055ba5727cac0 RDI: 000055ba57280900 RBP: 000055ba57280900 R08: 00007fff566d9700 R09: 0000000000000000 R10: 000055ba5727cac0 R11: 0000000000000246 R12: 0000000000000000 R13: 000055ba5727cac0 R14: 000055ba5727f270 R15: 0000000000000000 Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_object.c | 19 +++++++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b2151993d384b..a972887b3f5dc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -672,7 +672,7 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cbd1701e813a7..3028298ca5613 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4c7fbc6dcce73..1f141dc08ad28 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -55,6 +55,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } +static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; + /* * find an extant server call * - called in process context with IRQs enabled @@ -95,7 +97,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_call *call; @@ -114,6 +116,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) goto nomem_2; mutex_init(&call->user_mutex); + + /* Prevent lockdep reporting a deadlock false positive between the afs + * filesystem and sys_sendmsg() via the mmap sem. + */ + if (rx->sk.sk_kern_sock) + lockdep_set_class(&call->user_mutex, + &rxrpc_call_user_mutex_lock_class_key); + setup_timer(&call->timer, rxrpc_call_timer_expired, (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); @@ -151,7 +161,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; @@ -159,7 +170,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, _enter(""); - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -210,7 +221,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); From 3136ef49a14ccc148becf813074e08fc92fc9b23 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: [PATCH 086/406] rxrpc: Delay terminal ACK transmission on a client call Delay terminal ACK transmission on a client call by deferring it to the connection processor. This allows it to be skipped if we can send the next call instead, the first DATA packet of which will implicitly ack this call. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 17 ++++++++++ net/rxrpc/conn_client.c | 18 ++++++++++ net/rxrpc/conn_event.c | 74 +++++++++++++++++++++++++++++++++-------- net/rxrpc/conn_object.c | 10 ++++++ net/rxrpc/recvmsg.c | 2 ++ 5 files changed, 108 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a972887b3f5dc..d1213d503f30b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -338,8 +338,17 @@ enum rxrpc_conn_flag { RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ + RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ + RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ + RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ + RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ }; +#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ + (1UL << RXRPC_CONN_FINAL_ACK_1) | \ + (1UL << RXRPC_CONN_FINAL_ACK_2) | \ + (1UL << RXRPC_CONN_FINAL_ACK_3)) + /* * Events that can be raised upon a connection. */ @@ -393,6 +402,7 @@ struct rxrpc_connection { #define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { + unsigned long final_ack_at; /* Time at which to issue final ACK */ struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -404,6 +414,7 @@ struct rxrpc_connection { }; } channels[RXRPC_MAXCALLS]; + struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ @@ -861,6 +872,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) rxrpc_put_service_conn(conn); } +static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, + unsigned long expire_at) +{ + timer_reduce(&conn->timer, expire_at); +} + /* * conn_service.c */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f9624bd311c6..cfb997593da90 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + /* Cancel the final ACK on the previous call if it hasn't been sent yet + * as the DATA packet will implicitly ACK it. + */ + clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + write_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; @@ -813,6 +818,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out_2; } + /* Schedule the final ACK to be transmitted in a short while so that it + * can be skipped if we find a follow-on call. The first DATA packet + * of the follow on call will implicitly ACK this call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + unsigned long final_ack_at = jiffies + 2; + + WRITE_ONCE(chan->final_ack_at, final_ack_at); + smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ + set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + rxrpc_reduce_conn_timer(conn, final_ack_at); + } + /* Things are more complex and we need the cache lock. We might be * able to simply idle the conn or it might now be lurking on the wait * list. It might even get moved back to the active list whilst we're diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 59a51a56e7c88..9e9a8db1bc9cd 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -24,9 +24,10 @@ * Retransmit terminal ACK or ABORT of the previous call. */ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int channel) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; struct kvec iov; @@ -48,7 +49,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); - chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. @@ -56,7 +57,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, call_id = READ_ONCE(chan->last_call); /* Sync with __rxrpc_disconnect_call() */ smp_rmb(); - if (call_id != sp->hdr.callNumber) + if (skb && call_id != sp->hdr.callNumber) return; msg.msg_name = &conn->params.peer->srx.transport; @@ -65,9 +66,9 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, msg.msg_controllen = 0; msg.msg_flags = 0; - pkt.whdr.epoch = htonl(sp->hdr.epoch); - pkt.whdr.cid = htonl(sp->hdr.cid); - pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(conn->proto.cid); + pkt.whdr.callNumber = htonl(call_id); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -87,11 +88,11 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, mtu = conn->params.peer->if_mtu; mtu -= conn->params.peer->hdrsize; pkt.ack.bufferSpace = 0; - pkt.ack.maxSkew = htons(skb->priority); - pkt.ack.firstPacket = htonl(chan->last_seq); - pkt.ack.previousPacket = htonl(chan->last_seq - 1); - pkt.ack.serial = htonl(sp->hdr.serial); - pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.maxSkew = htons(skb ? skb->priority : 0); + pkt.ack.firstPacket = htonl(chan->last_seq + 1); + pkt.ack.previousPacket = htonl(chan->last_seq); + pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); + pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; pkt.info.rxMTU = htonl(rxrpc_rx_mtu); pkt.info.maxMTU = htonl(mtu); @@ -272,7 +273,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb); + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); return 0; case RXRPC_PACKET_TYPE_BUSY: @@ -378,6 +380,48 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) _leave(" [aborted]"); } +/* + * Process delayed final ACKs that we haven't subsumed into a subsequent call. + */ +static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) +{ + unsigned long j = jiffies, next_j; + unsigned int channel; + bool set; + +again: + next_j = j + LONG_MAX; + set = false; + for (channel = 0; channel < RXRPC_MAXCALLS; channel++) { + struct rxrpc_channel *chan = &conn->channels[channel]; + unsigned long ack_at; + + if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) + continue; + + smp_rmb(); /* vs rxrpc_disconnect_client_call */ + ack_at = READ_ONCE(chan->final_ack_at); + + if (time_before(j, ack_at)) { + if (time_before(ack_at, next_j)) { + next_j = ack_at; + set = true; + } + continue; + } + + if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, + &conn->flags)) + rxrpc_conn_retransmit_call(conn, NULL, channel); + } + + j = jiffies; + if (time_before_eq(next_j, j)) + goto again; + if (set) + rxrpc_reduce_conn_timer(conn, next_j); +} + /* * connection-level event processor */ @@ -394,6 +438,10 @@ void rxrpc_process_connection(struct work_struct *work) if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn); + /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fe575798592fe..a01a3791f06ac 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -24,6 +24,14 @@ unsigned int rxrpc_connection_expiry = 10 * 60; static void rxrpc_destroy_connection(struct rcu_head *); +static void rxrpc_connection_timer(struct timer_list *timer) +{ + struct rxrpc_connection *conn = + container_of(timer, struct rxrpc_connection, timer); + + rxrpc_queue_conn(conn); +} + /* * allocate a new connection */ @@ -38,6 +46,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); INIT_LIST_HEAD(&conn->waiting_calls); + timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, &rxrpc_process_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); @@ -332,6 +341,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) _net("DESTROY CONN %d", conn->debug_id); + del_timer_sync(&conn->timer); rxrpc_purge_queue(&conn->rx_queue); conn->security->clear(conn); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8510a98b87e1e..be0b9ae138933 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); +#if 0 // TODO: May want to transmit final ACK under some circumstances anyway if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); rxrpc_send_ack_packet(call, false); } +#endif write_lock_bh(&call->state_lock); From 4812417894770f8c13e5dd8a66479ae44f4b01ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: [PATCH 087/406] rxrpc: Split the call params from the operation params When rxrpc_sendmsg() parses the control message buffer, it places the parameters extracted into a structure, but lumps together call parameters (such as user call ID) with operation parameters (such as whether to send data, send an abort or accept a call). Split the call parameters out into their own structure, a copy of which is then embedded in the operation parameters struct. The call parameters struct is then passed down into the places that need it instead of passing the individual parameters. This allows for extra call parameters to be added. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 8 +++++-- net/rxrpc/ar-internal.h | 31 ++++++++++++++++++++++++- net/rxrpc/call_object.c | 15 ++++++------ net/rxrpc/sendmsg.c | 51 ++++++++++++++--------------------------- 4 files changed, 60 insertions(+), 45 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9b5c46b052fd0..c0cdcf980ffc3 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, bool upgrade) { struct rxrpc_conn_parameters cp; + struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); int ret; @@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, if (key && !key->payload.data[0]) key = NULL; /* a no-security key */ + memset(&p, 0, sizeof(p)); + p.user_call_ID = user_call_ID; + p.tx_total_len = tx_total_len; + memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.key = key; @@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, - gfp); + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d1213d503f30b..ba63f22311073 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -643,6 +643,35 @@ struct rxrpc_ack_summary { u8 cumulative_acks; }; +/* + * sendmsg() cmsg-specified parameters. + */ +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + +struct rxrpc_call_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + struct { + u32 hard; /* Maximum lifetime (sec) */ + u32 idle; /* Max time since last data packet (msec) */ + u32 normal; /* Max time since last call packet (msec) */ + } timeouts; + u8 nr_timeouts; /* Number of timeouts specified */ +}; + +struct rxrpc_send_params { + struct rxrpc_call_params call; + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + #include /* @@ -687,7 +716,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - unsigned long, s64, gfp_t); + struct rxrpc_call_params *, gfp_t); int rxrpc_retry_client_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 1f141dc08ad28..c3e1fa8544713 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -208,8 +208,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, - unsigned long user_call_ID, - s64 tx_total_len, + struct rxrpc_call_params *p, gfp_t gfp) __releases(&rx->sk.sk_lock.slock) { @@ -219,7 +218,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, const void *here = __builtin_return_address(0); int ret; - _enter("%p,%lx", rx, user_call_ID); + _enter("%p,%lx", rx, p->user_call_ID); call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { @@ -228,9 +227,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - call->tx_total_len = tx_total_len; + call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), - here, (const void *)user_call_ID); + here, (const void *)p->user_call_ID); /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. @@ -246,16 +245,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) + if (p->user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) + else if (p->user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); - call->user_call_ID = user_call_ID; + call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 94555c94b2d86..de5ab327c18ab 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -21,22 +21,6 @@ #include #include "ar-internal.h" -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - -struct rxrpc_send_params { - s64 tx_total_len; /* Total Tx data length (if send data) */ - unsigned long user_call_ID; /* User's call ID */ - u32 abort_code; /* Abort code to Tx (if abort) */ - enum rxrpc_command command : 8; /* The command to implement */ - bool exclusive; /* Shared or exclusive call */ - bool upgrade; /* If the connection is upgradeable */ -}; - /* * Wait for space to appear in the Tx queue or a signal to occur. */ @@ -480,11 +464,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; - p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); + p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; - p->user_call_ID = *(unsigned long *) + p->call.user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } got_user_ID = true; @@ -522,10 +506,10 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) break; case RXRPC_TX_LENGTH: - if (p->tx_total_len != -1 || len != sizeof(__s64)) + if (p->call.tx_total_len != -1 || len != sizeof(__s64)) return -EINVAL; - p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); - if (p->tx_total_len < 0) + p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->call.tx_total_len < 0) return -EINVAL; break; @@ -536,7 +520,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (!got_user_ID) return -EINVAL; - if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; _leave(" = 0"); return 0; @@ -576,8 +560,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, - p->tx_total_len, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); /* The socket is now unlocked */ _leave(" = %p\n", call); @@ -597,12 +580,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) int ret; struct rxrpc_send_params p = { - .tx_total_len = -1, - .user_call_ID = 0, - .abort_code = 0, - .command = RXRPC_CMD_SEND_DATA, - .exclusive = false, - .upgrade = false, + .call.tx_total_len = -1, + .call.user_call_ID = 0, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = false, }; _enter(""); @@ -615,7 +598,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.user_call_ID, NULL); + call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); @@ -623,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto out_put_unlock; } - call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); + call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); if (!call) { ret = -EBADSLT; if (p.command != RXRPC_CMD_SEND_DATA) @@ -653,13 +636,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto error_put; } - if (p.tx_total_len != -1) { + if (p.call.tx_total_len != -1) { ret = -EINVAL; if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) goto error_put; - call->tx_total_len = p.tx_total_len; + call->tx_total_len = p.call.tx_total_len; } } From a158bdd3247b9656df36ba133235fff702e9fdc3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: [PATCH 088/406] rxrpc: Fix call timeouts Fix the rxrpc call expiration timeouts and make them settable from userspace. By analogy with other rx implementations, there should be three timeouts: (1) "Normal timeout" This is set for all calls and is triggered if we haven't received any packets from the peer in a while. It is measured from the last time we received any packet on that call. This is not reset by any connection packets (such as CHALLENGE/RESPONSE packets). If a service operation takes a long time, the server should generate PING ACKs at a duration that's substantially less than the normal timeout so is to keep both sides alive. This is set at 1/6 of normal timeout. (2) "Idle timeout" This is set only for a service call and is triggered if we stop receiving the DATA packets that comprise the request data. It is measured from the last time we received a DATA packet. (3) "Hard timeout" This can be set for a call and specified the maximum lifetime of that call. It should not be specified by default. Some operations (such as volume transfer) take a long time. Allow userspace to set/change the timeouts on a call with sendmsg, using a control message: RXRPC_SET_CALL_TIMEOUTS The data to the message is a number of 32-bit words, not all of which need be given: u32 hard_timeout; /* sec from first packet */ u32 idle_timeout; /* msec from packet Rx */ u32 normal_timeout; /* msec from data Rx */ This can be set in combination with any other sendmsg() that affects a call. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 69 +++++++++----- include/uapi/linux/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 37 +++++--- net/rxrpc/call_event.c | 179 ++++++++++++++++------------------- net/rxrpc/call_object.c | 27 +++--- net/rxrpc/conn_client.c | 4 +- net/rxrpc/input.c | 34 ++++++- net/rxrpc/misc.c | 19 ++-- net/rxrpc/recvmsg.c | 2 +- net/rxrpc/sendmsg.c | 59 ++++++++++-- net/rxrpc/sysctl.c | 60 ++++++------ 11 files changed, 290 insertions(+), 201 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ebe96796027ae..01dcbc2164b59 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -138,10 +138,20 @@ enum rxrpc_rtt_rx_trace { enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_exp_ack, + rxrpc_timer_exp_hard, + rxrpc_timer_exp_idle, + rxrpc_timer_exp_normal, + rxrpc_timer_exp_ping, + rxrpc_timer_exp_resend, rxrpc_timer_expired, rxrpc_timer_init_for_reply, rxrpc_timer_init_for_send_reply, + rxrpc_timer_restart, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_hard, + rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, @@ -296,12 +306,22 @@ enum rxrpc_congest_change { #define rxrpc_timer_traces \ EM(rxrpc_timer_begin, "Begin ") \ EM(rxrpc_timer_expired, "*EXPR*") \ + EM(rxrpc_timer_exp_ack, "ExpAck") \ + EM(rxrpc_timer_exp_hard, "ExpHrd") \ + EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_normal, "ExpNml") \ + EM(rxrpc_timer_exp_ping, "ExpPng") \ + EM(rxrpc_timer_exp_resend, "ExpRsn") \ EM(rxrpc_timer_init_for_reply, "IniRpl") \ EM(rxrpc_timer_init_for_send_reply, "SndRpl") \ + EM(rxrpc_timer_restart, "Restrt") \ EM(rxrpc_timer_set_for_ack, "SetAck") \ + EM(rxrpc_timer_set_for_hard, "SetHrd") \ + EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ - E_(rxrpc_timer_set_for_send, "SetTx ") + E_(rxrpc_timer_set_for_send, "SetSnd") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -932,39 +952,44 @@ TRACE_EVENT(rxrpc_rtt_rx, TRACE_EVENT(rxrpc_timer, TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now, unsigned long now_j), + unsigned long now), - TP_ARGS(call, why, now, now_j), + TP_ARGS(call, why, now), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(enum rxrpc_timer_trace, why ) - __field_struct(ktime_t, now ) - __field_struct(ktime_t, expire_at ) - __field_struct(ktime_t, ack_at ) - __field_struct(ktime_t, resend_at ) - __field(unsigned long, now_j ) - __field(unsigned long, timer ) + __field(long, now ) + __field(long, ack_at ) + __field(long, resend_at ) + __field(long, ping_at ) + __field(long, expect_rx_by ) + __field(long, expect_req_by ) + __field(long, expect_term_by ) + __field(long, timer ) ), TP_fast_assign( - __entry->call = call; - __entry->why = why; - __entry->now = now; - __entry->expire_at = call->expire_at; - __entry->ack_at = call->ack_at; - __entry->resend_at = call->resend_at; - __entry->now_j = now_j; - __entry->timer = call->timer.expires; + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->expect_rx_by = call->expect_rx_by; + __entry->expect_req_by = call->expect_req_by; + __entry->expect_term_by = call->expect_term_by; + __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", + TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), - ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), - __entry->timer - __entry->now_j) + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->expect_rx_by - __entry->now, + __entry->expect_req_by - __entry->now, + __entry->expect_term_by - __entry->now, + __entry->timer - __entry->now) ); TRACE_EVENT(rxrpc_rx_lose, diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 9d4afea308a43..9335d92c14a40 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -59,6 +59,7 @@ enum rxrpc_cmsg_type { RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ + RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ RXRPC__SUPPORTED }; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba63f22311073..548411371048e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -468,9 +468,9 @@ enum rxrpc_call_flag { enum rxrpc_call_event { RXRPC_CALL_EV_ACK, /* need to generate ACK */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ + RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ }; /* @@ -514,10 +514,14 @@ struct rxrpc_call { struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ - ktime_t ack_at; /* When deferred ACK needs to happen */ - ktime_t resend_at; /* When next resend needs to happen */ - ktime_t ping_at; /* When next to send a ping */ - ktime_t expire_at; /* When the call times out */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long ping_at; /* When next to send a ping */ + unsigned long expect_rx_by; /* When we expect to get a packet by */ + unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ + unsigned long expect_term_by; /* When we expect call termination by */ + u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ + u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -697,12 +701,19 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); +static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + trace_rxrpc_timer(call, why, now); + timer_reduce(&call->timer, expire_at); +} + /* * call_object.c */ @@ -843,8 +854,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, */ extern unsigned int rxrpc_max_client_connections; extern unsigned int rxrpc_reap_client_connections; -extern unsigned int rxrpc_conn_idle_client_expiry; -extern unsigned int rxrpc_conn_idle_client_fast_expiry; +extern unsigned long rxrpc_conn_idle_client_expiry; +extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); @@ -976,13 +987,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local) * misc.c */ extern unsigned int rxrpc_max_backlog __read_mostly; -extern unsigned int rxrpc_requested_ack_delay; -extern unsigned int rxrpc_soft_ack_delay; -extern unsigned int rxrpc_idle_ack_delay; +extern unsigned long rxrpc_requested_ack_delay; +extern unsigned long rxrpc_soft_ack_delay; +extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned int rxrpc_resend_timeout; +extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3574508baf9aa..c14395d5ad8c8 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -21,80 +21,6 @@ #include #include "ar-internal.h" -/* - * Set the timer - */ -void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - unsigned long t_j, now_j = jiffies; - ktime_t t; - bool queue = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - t = call->expire_at; - if (!ktime_after(t, now)) { - trace_rxrpc_timer(call, why, now, now_j); - queue = true; - goto out; - } - - if (!ktime_after(call->resend_at, now)) { - call->resend_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - queue = true; - } else if (ktime_before(call->resend_at, t)) { - t = call->resend_at; - } - - if (!ktime_after(call->ack_at, now)) { - call->ack_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - queue = true; - } else if (ktime_before(call->ack_at, t)) { - t = call->ack_at; - } - - if (!ktime_after(call->ping_at, now)) { - call->ping_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) - queue = true; - } else if (ktime_before(call->ping_at, t)) { - t = call->ping_at; - } - - t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); - t_j += jiffies; - - /* We have to make sure that the calculated jiffies value falls - * at or after the nsec value, or we may loop ceaselessly - * because the timer times out, but we haven't reached the nsec - * timeout yet. - */ - t_j++; - - if (call->timer.expires != t_j || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t_j); - trace_rxrpc_timer(call, why, now, now_j); - } - } - -out: - if (queue) - rxrpc_queue_call(call); -} - -/* - * Set the timer - */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - read_lock_bh(&call->state_lock); - __rxrpc_set_timer(call, why, now); - read_unlock_bh(&call->state_lock); -} - /* * Propose a PING ACK be sent. */ @@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) rxrpc_queue_call(call); } else { - ktime_t now = ktime_get_real(); - ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + unsigned long now = jiffies; + unsigned long ping_at = now + rxrpc_idle_ack_delay; - if (ktime_before(ping_at, call->ping_at)) { - call->ping_at = ping_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + if (time_before(ping_at, call->ping_at)) { + WRITE_ONCE(call->ping_at, ping_at); + rxrpc_reduce_call_timer(call, ping_at, now, + rxrpc_timer_set_for_ping); } } } @@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned int expiry = rxrpc_soft_ack_delay; - ktime_t now, ack_at; + unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; /* Pings are handled specially because we don't want to accidentally @@ -190,11 +116,12 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - now = ktime_get_real(); - ack_at = ktime_add_ms(now, expiry); - if (ktime_before(ack_at, call->ack_at)) { - call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); + now = jiffies; + ack_at = jiffies + expiry; + if (time_before(ack_at, call->ack_at)) { + WRITE_ONCE(call->ack_at, ack_at); + rxrpc_reduce_call_timer(call, ack_at, now, + rxrpc_timer_set_for_ack); } } @@ -227,18 +154,20 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) +static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + now = ktime_get_real(); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout * 1000 / HZ); spin_lock_bh(&call->lock); @@ -282,7 +211,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now))); + resend_at += jiffies + rxrpc_resend_timeout; + WRITE_ONCE(call->resend_at, resend_at); if (unacked) rxrpc_congestion_timeout(call); @@ -292,7 +223,8 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) @@ -364,7 +296,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - ktime_t now; + unsigned long now, next, t; rxrpc_see_call(call); @@ -384,8 +316,50 @@ void rxrpc_process_call(struct work_struct *work) goto out_put; } - now = ktime_get_real(); - if (ktime_before(call->expire_at, now)) { + /* Work out if any timeouts tripped */ + now = jiffies; + t = READ_ONCE(call->expect_rx_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_req_by); + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_term_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->ack_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); + cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK, &call->events); + } + + t = READ_ONCE(call->ping_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); + cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->resend_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); + cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_RESEND, &call->events); + } + + /* Process events */ + if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; @@ -408,7 +382,22 @@ void rxrpc_process_call(struct work_struct *work) goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + /* Make sure the timer is restarted */ + next = call->expect_rx_by; + +#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } + + set(call->expect_req_by); + set(call->expect_term_by); + set(call->ack_at); + set(call->resend_at); + set(call->ping_at); + + now = jiffies; + if (time_after_eq(now, next)) + goto recheck_state; + + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c3e1fa8544713..b305970a9b63a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -51,8 +51,10 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + rxrpc_queue_call(call); + } } static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; @@ -139,6 +141,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -189,15 +193,16 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - ktime_t now = ktime_get_real(), expire_at; - - expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); - call->expire_at = expire_at; - call->ack_at = expire_at; - call->ping_at = expire_at; - call->resend_at = expire_at; - call->timer.expires = jiffies + LONG_MAX / 2; - rxrpc_set_timer(call, rxrpc_timer_begin, now); + unsigned long now = jiffies; + unsigned long j = now + MAX_JIFFY_OFFSET; + + call->ack_at = j; + call->resend_at = j; + call->ping_at = j; + call->expect_rx_by = j; + call->expect_req_by = j; + call->expect_term_by = j; + call->timer.expires = now; } /* diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index cfb997593da90..97f6a8de4845d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -85,8 +85,8 @@ __read_mostly unsigned int rxrpc_max_client_connections = 1000; __read_mostly unsigned int rxrpc_reap_client_connections = 900; -__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; -__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; /* * We use machine-unique IDs for our client connections. diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1b592073ec960..c89647eae86db 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -318,16 +318,18 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, static bool rxrpc_receiving_reply(struct rxrpc_call *call) { struct rxrpc_ack_summary summary = { 0 }; + unsigned long now, timo; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { spin_lock_bh(&call->lock); call->ackr_reason = 0; - call->resend_at = call->expire_at; - call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply, - ktime_get_real()); + now = jiffies; + timo = now + MAX_JIFFY_OFFSET; + WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->ack_at, timo); + trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) @@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, if (state >= RXRPC_CALL_COMPLETE) return; + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + unsigned long timo = READ_ONCE(call->next_req_timo); + unsigned long now, expect_req_by; + + if (timo) { + now = jiffies; + expect_req_by = now + timo; + WRITE_ONCE(call->expect_req_by, expect_req_by); + rxrpc_reduce_call_timer(call, expect_req_by, now, + rxrpc_timer_set_for_idle); + } + } + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ @@ -908,9 +923,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long timo; _enter("%p,%p", call, skb); + timo = READ_ONCE(call->next_rx_timo); + if (timo) { + unsigned long now = jiffies, expect_rx_by; + + expect_rx_by = jiffies + timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, now, + rxrpc_timer_set_for_normal); + } + switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1a2d4b1120649..c1d9e7fd7448d 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -20,34 +20,29 @@ */ unsigned int rxrpc_max_backlog __read_mostly = 10; -/* - * Maximum lifetime of a call (in mx). - */ -unsigned int rxrpc_max_call_lifetime = 60 * 1000; - /* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in ms). + * packet with RXRPC_REQUEST_ACK set (in jiffies). */ -unsigned int rxrpc_requested_ack_delay = 1; +unsigned long rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in ms). + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * 1000; +unsigned long rxrpc_soft_ack_delay = HZ; /* - * How long to wait before scheduling an ACK with subtype IDLE (in ms). + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; +unsigned long rxrpc_idle_ack_delay = HZ / 2; /* * Receive window size in packets. This indicates the maximum number of @@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4; /* * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * 1000; +unsigned long rxrpc_resend_timeout = 4 * HZ; const s8 rxrpc_ack_priority[] = { [0] = 0, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index be0b9ae138933..0b6609da80b75 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -163,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->ack_at = call->expire_at; + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; write_unlock_bh(&call->state_lock); rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, rxrpc_propose_ack_processing_op); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index de5ab327c18ab..03e0676db28c1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -158,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long now; rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; u8 annotation = RXRPC_TX_ANNO_UNACK; @@ -197,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; - call->ack_at = call->expire_at; + now = jiffies; + WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; - __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, - ktime_get_real()); + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); if (!last) break; /* Fall through */ @@ -223,14 +224,12 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - ktime_t now = ktime_get_real(), resend_at; + unsigned long now = jiffies, resend_at; - resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - - if (ktime_before(resend_at, call->resend_at)) { - call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); - } + resend_at = now + rxrpc_resend_timeout; + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); @@ -513,6 +512,19 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) return -EINVAL; break; + case RXRPC_SET_CALL_TIMEOUT: + if (len & 3 || len < 4 || len > 12) + return -EINVAL; + memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); + p->call.nr_timeouts = len / 4; + if (p->call.timeouts.hard > INT_MAX / HZ) + return -ERANGE; + if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) + return -ERANGE; + if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) + return -ERANGE; + break; + default: return -EINVAL; } @@ -577,11 +589,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_call_state state; struct rxrpc_call *call; + unsigned long now, j; int ret; struct rxrpc_send_params p = { .call.tx_total_len = -1, .call.user_call_ID = 0, + .call.nr_timeouts = 0, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, @@ -646,6 +660,31 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } } + switch (p.call.nr_timeouts) { + case 3: + j = msecs_to_jiffies(p.call.timeouts.normal); + if (p.call.timeouts.normal > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_rx_timo, j); + /* Fall through */ + case 2: + j = msecs_to_jiffies(p.call.timeouts.idle); + if (p.call.timeouts.idle > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_req_timo, j); + /* Fall through */ + case 1: + if (p.call.timeouts.hard > 0) { + j = msecs_to_jiffies(p.call.timeouts.hard); + now = jiffies; + j += now; + WRITE_ONCE(call->expect_term_by, j); + rxrpc_reduce_call_timer(call, j, now, + rxrpc_timer_set_for_hard); + } + break; + } + state = READ_ONCE(call->state); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, state, call->conn); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 34c706d2f79c6..4a7af7aff37d2 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -21,6 +21,8 @@ static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; +static const unsigned long one_jiffy = 1; +static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; /* * RxRPC operating parameters. @@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; * information on the individual parameters. */ static struct ctl_table rxrpc_sysctl_table[] = { - /* Values measured in milliseconds */ + /* Values measured in milliseconds but used in jiffies */ { .procname = "req_ack_delay", .data = &rxrpc_requested_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&zero, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "soft_ack_delay", .data = &rxrpc_soft_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_ack_delay", .data = &rxrpc_idle_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, - }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_expiry", .data = &rxrpc_conn_idle_client_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_fast_expiry", .data = &rxrpc_conn_idle_client_fast_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, - - /* Values measured in seconds but used in jiffies */ { - .procname = "max_call_lifetime", - .data = &rxrpc_max_call_lifetime, - .maxlen = sizeof(unsigned int), + .procname = "resend_timeout", + .data = &rxrpc_resend_timeout, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, /* Non-time values */ From 8637abaa72fe8200a4a37579e6ec5273db85d2c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: [PATCH 089/406] rxrpc: Don't transmit DELAY ACKs immediately on proposal Don't transmit a DELAY ACK immediately on proposal when the Rx window is rotated, but rather defer it to the work function. This means that we have a chance to queue/consume more received packets before we actually send the DELAY ACK, or even cancel it entirely, thereby reducing the number of packets transmitted. We do, however, want to continue sending other types of packet immediately, particularly REQUESTED ACKs, as they may be used for RTT calculation by the other side. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0b6609da80b75..fad5f42a3abd3 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -219,9 +219,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) after_eq(top, call->ackr_seen + 2) || (hard_ack == top && after(hard_ack, call->ackr_consumed))) rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, - true, false, + true, true, rxrpc_propose_ack_rotate_rx); - if (call->ackr_reason) + if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) rxrpc_send_ack_packet(call, false); } } From beb8e5e4f38cc3e4c2839cfc143e0312bf53d0e0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: [PATCH 090/406] rxrpc: Express protocol timeouts in terms of RTT Express protocol timeouts for data retransmission and deferred ack generation in terms on RTT rather than specified timeouts once we have sufficient RTT samples. For the moment, this requires just one RTT sample to be able to use this for ack deferral and two for data retransmission. The data retransmission timeout is set at RTT*1.5 and the ACK deferral timeout is set at RTT. Note that the calculated timeout is limited to a minimum of 4ns to make sure it doesn't happen too quickly. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 22 ++++++++++++++++++---- net/rxrpc/sendmsg.c | 7 +++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c14395d5ad8c8..da91f16ac77c5 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -52,7 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; + unsigned long expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; /* Pings are handled specially because we don't want to accidentally @@ -116,7 +116,13 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - now = jiffies; + unsigned long now = jiffies, ack_at; + + if (call->peer->rtt_usage > 0) + ack_at = nsecs_to_jiffies(call->peer->rtt); + else + ack_at = expiry; + ack_at = jiffies + expiry; if (time_before(ack_at, call->ack_at)) { WRITE_ONCE(call->ack_at, ack_at); @@ -160,14 +166,22 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) struct sk_buff *skb; unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + if (call->peer->rtt_usage > 1) + timeout = ns_to_ktime(call->peer->rtt * 3 / 2); + else + timeout = ms_to_ktime(rxrpc_resend_timeout); + min_timeo = ns_to_ktime((1000000000 / HZ) * 4); + if (ktime_before(timeout, min_timeo)) + timeout = min_timeo; + now = ktime_get_real(); - max_age = ktime_sub_ms(now, rxrpc_resend_timeout * 1000 / HZ); + max_age = ktime_sub(now, timeout); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 03e0676db28c1..c56ee54fdd1f0 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -226,6 +226,13 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, } else { unsigned long now = jiffies, resend_at; + if (call->peer->rtt_usage > 1) + resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); + else + resend_at = rxrpc_resend_timeout; + if (resend_at < 1) + resend_at = 1; + resend_at = now + rxrpc_resend_timeout; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, From bd1fdf8cfdf3fdbccd2b21c33ec649ebd7429af7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: [PATCH 091/406] rxrpc: Add a timeout for detecting lost ACKs/lost DATA Add an extra timeout that is set/updated when we send a DATA packet that has the request-ack flag set. This allows us to detect if we don't get an ACK in response to the latest flagged packet. The ACK packet is adjudged to have been lost if it doesn't turn up within 2*RTT of the transmission. If the timeout occurs, we schedule the sending of a PING ACK to find out the state of the other side. If a new DATA packet is ready to go sooner, we cancel the sending of the ping and set the request-ack flag on that instead. If we get back a PING-RESPONSE ACK that indicates a lower tx_top than what we had at the time of the ping transmission, we adjudge all the DATA packets sent between the response tx_top and the ping-time tx_top to have been lost and retransmit immediately. Rather than sending a PING ACK, we could just pick a DATA packet and speculatively retransmit that with request-ack set. It should result in either a REQUESTED ACK or a DUPLICATE ACK which we can then use in lieu the a PING-RESPONSE ACK mentioned above. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 11 ++++++++-- net/rxrpc/ar-internal.h | 6 +++++- net/rxrpc/call_event.c | 26 +++++++++++++++++++---- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 40 ++++++++++++++++++++++++++++++++++++ net/rxrpc/output.c | 20 ++++++++++++++++-- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 2 +- 8 files changed, 98 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 01dcbc2164b59..84ade8b76a199 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, rxrpc_timer_exp_resend, @@ -151,6 +152,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, @@ -309,6 +311,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ EM(rxrpc_timer_exp_resend, "ExpRsn") \ @@ -318,6 +321,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ @@ -961,6 +965,7 @@ TRACE_EVENT(rxrpc_timer, __field(enum rxrpc_timer_trace, why ) __field(long, now ) __field(long, ack_at ) + __field(long, ack_lost_at ) __field(long, resend_at ) __field(long, ping_at ) __field(long, expect_rx_by ) @@ -974,6 +979,7 @@ TRACE_EVENT(rxrpc_timer, __entry->why = why; __entry->now = now; __entry->ack_at = call->ack_at; + __entry->ack_lost_at = call->ack_lost_at; __entry->resend_at = call->resend_at; __entry->expect_rx_by = call->expect_rx_by; __entry->expect_req_by = call->expect_req_by; @@ -981,10 +987,11 @@ TRACE_EVENT(rxrpc_timer, __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%p %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), __entry->ack_at - __entry->now, + __entry->ack_lost_at - __entry->now, __entry->resend_at - __entry->now, __entry->expect_rx_by - __entry->now, __entry->expect_req_by - __entry->now, @@ -1105,7 +1112,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%p r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 548411371048e..7e7b817c69f08 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -471,6 +471,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ + RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ }; /* @@ -515,6 +516,7 @@ struct rxrpc_call { struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ @@ -624,6 +626,8 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ + rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ + rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ }; /* @@ -1011,7 +1015,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *, bool); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index da91f16ac77c5..c65666b2f39e3 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -245,7 +245,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto out; } @@ -310,6 +310,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); + rxrpc_serial_t *send_ack; unsigned long now, next, t; rxrpc_see_call(call); @@ -358,6 +359,13 @@ void rxrpc_process_call(struct work_struct *work) set_bit(RXRPC_CALL_EV_ACK, &call->events); } + t = READ_ONCE(call->ack_lost_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); + cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -379,15 +387,24 @@ void rxrpc_process_call(struct work_struct *work) goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + send_ack = NULL; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { + call->acks_lost_top = call->tx_top; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + send_ack = &call->acks_lost_ping; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + send_ack) { if (call->ackr_reason) { - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, send_ack); goto recheck_state; } } if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto recheck_state; } @@ -404,6 +421,7 @@ void rxrpc_process_call(struct work_struct *work) set(call->expect_req_by); set(call->expect_term_by); set(call->ack_at); + set(call->ack_lost_at); set(call->resend_at); set(call->ping_at); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b305970a9b63a..7ee3d6ce5aa2b 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -197,6 +197,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) unsigned long j = now + MAX_JIFFY_OFFSET; call->ack_at = j; + call->ack_lost_at = j; call->resend_at = j; call->ping_at = j; call->expect_rx_by = j; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c89647eae86db..23a5e61d8f79a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -630,6 +630,43 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call, orig_serial, ack_serial, sent_at, resp_time); } +/* + * Process the response to a ping that we sent to find out if we lost an ACK. + * + * If we got back a ping response that indicates a lower tx_top than what we + * had at the time of the ping transmission, we adjudge all the DATA packets + * sent between the response tx_top and the ping-time tx_top to have been lost. + */ +static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) +{ + rxrpc_seq_t top, bottom, seq; + bool resend = false; + + spin_lock_bh(&call->lock); + + bottom = call->tx_hard_ack + 1; + top = call->acks_lost_top; + if (before(bottom, top)) { + for (seq = bottom; before_eq(seq, top); seq++) { + int ix = seq & RXRPC_RXTX_BUFF_MASK; + u8 annotation = call->rxtx_annotations[ix]; + u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; + + if (anno_type != RXRPC_TX_ANNO_UNACK) + continue; + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = annotation; + resend = true; + } + } + + spin_unlock_bh(&call->lock); + + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + /* * Process a ping response. */ @@ -645,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, smp_rmb(); ping_serial = call->ping_serial; + if (orig_serial == call->acks_lost_ping) + rxrpc_input_check_for_lost_ack(call); + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) return; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f47659c7b224e..efe06edce189a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -95,7 +95,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -165,6 +166,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); + if (_serial) + *_serial = serial; if (ping) { call->ping_serial = serial; @@ -323,7 +326,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && - (retrans || + (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || + retrans || call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), @@ -370,6 +374,18 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + if (call->peer->rtt_usage > 1) { + unsigned long nowj = jiffies, ack_lost_at; + + ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); + if (ack_lost_at < 1) + ack_lost_at = 1; + + ack_lost_at += nowj; + WRITE_ONCE(call->ack_lost_at, ack_lost_at); + rxrpc_reduce_call_timer(call, ack_lost_at, nowj, + rxrpc_timer_set_for_lost_ack); + } } } _leave(" = %d [%u]", ret, call->peer->maxdata); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index fad5f42a3abd3..cc21e8db25b0b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -148,7 +148,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } #endif @@ -222,7 +222,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, true, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index c56ee54fdd1f0..a1c53ac066a10 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -285,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); if (!skb) { size_t size, chunk, max, space; From 415f44e43282a16ec0808c7ccfd401762e587437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: [PATCH 092/406] rxrpc: Add keepalive for a call We need to transmit a packet every so often to act as a keepalive for the peer (which has a timeout from the last time it received a packet) and also to prevent any intervening firewalls from closing the route. Do this by resetting a timer every time we transmit a packet. If the timer ever expires, we transmit a PING ACK packet and thereby also elicit a PING RESPONSE ACK from the other side - which prevents our last-rx timeout from expiring. The timer is set to 1/6 of the last-rx timeout so that we can detect the other side going away if it misses 6 replies in a row. This is particularly necessary for servers where the processing of the service function may take a significant amount of time. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 ++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 10 ++++++++++ net/rxrpc/output.c | 23 +++++++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 84ade8b76a199..e98fed6de4973 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_keepalive, rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, @@ -152,6 +153,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_keepalive, rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, @@ -162,6 +164,7 @@ enum rxrpc_timer_trace { enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_keepalive, rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, @@ -311,6 +314,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_keepalive, "ExpKA ") \ EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ @@ -321,6 +325,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_keepalive, "KeepAl") \ EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ @@ -330,6 +335,7 @@ enum rxrpc_congest_change { #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ + EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7e7b817c69f08..cdcbc798f9217 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -519,6 +519,7 @@ struct rxrpc_call { unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ + unsigned long keepalive_at; /* When next to send a keepalive ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ unsigned long expect_term_by; /* When we expect call termination by */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c65666b2f39e3..bda952ffe6a6e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -366,6 +366,15 @@ void rxrpc_process_call(struct work_struct *work) set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); } + t = READ_ONCE(call->keepalive_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); + cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ack_ping_for_keepalive); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -423,6 +432,7 @@ void rxrpc_process_call(struct work_struct *work) set(call->ack_at); set(call->ack_lost_at); set(call->resend_at); + set(call->keepalive_at); set(call->ping_at); now = jiffies; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index efe06edce189a..42410e910affb 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -32,6 +32,24 @@ struct rxrpc_abort_buffer { __be32 abort_code; }; +/* + * Arrange for a keepalive ping a certain time after we last transmitted. This + * lets the far side know we're still interested in this call and helps keep + * the route through any intervening firewall open. + * + * Receiving a response to the ping will prevent the ->expect_rx_by timer from + * expiring. + */ +static void rxrpc_set_keepalive(struct rxrpc_call *call) +{ + unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + + keepalive_at += now; + WRITE_ONCE(call->keepalive_at, keepalive_at); + rxrpc_reduce_call_timer(call, keepalive_at, now, + rxrpc_timer_set_for_keepalive); +} + /* * Fill out an ACK packet. */ @@ -205,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, call->ackr_seen = top; spin_unlock_bh(&call->lock); } + + rxrpc_set_keepalive(call); } out: @@ -388,6 +408,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, } } } + + rxrpc_set_keepalive(call); + _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; From f859ab61875978eeaa539740ff7f7d91f5d60006 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: [PATCH 093/406] rxrpc: Fix service endpoint expiry RxRPC service endpoints expire like they're supposed to by the following means: (1) Mark dead rxrpc_net structs (with ->live) rather than twiddling the global service conn timeout, otherwise the first rxrpc_net struct to die will cause connections on all others to expire immediately from then on. (2) Mark local service endpoints for which the socket has been closed (->service_closed) so that the expiration timeout can be much shortened for service and client connections going through that endpoint. (3) rxrpc_put_service_conn() needs to schedule the reaper when the usage count reaches 1, not 0, as idle conns have a 1 count. (4) The accumulator for the earliest time we might want to schedule for should be initialised to jiffies + MAX_JIFFY_OFFSET, not ULONG_MAX as the comparison functions use signed arithmetic. (5) Simplify the expiration handling, adding the expiration value to the idle timestamp each time rather than keeping track of the time in the past before which the idle timestamp must go to be expired. This is much easier to read. (6) Ignore the timeouts if the net namespace is dead. (7) Restart the service reaper work item rather the client reaper. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 13 +++++++++++ net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/conn_client.c | 2 ++ net/rxrpc/conn_object.c | 42 ++++++++++++++++++++---------------- net/rxrpc/net_ns.c | 3 +++ 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e98fed6de4973..36cb50c111a64 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -49,6 +49,7 @@ enum rxrpc_conn_trace { rxrpc_conn_put_client, rxrpc_conn_put_service, rxrpc_conn_queued, + rxrpc_conn_reap_service, rxrpc_conn_seen, }; @@ -221,6 +222,7 @@ enum rxrpc_congest_change { EM(rxrpc_conn_put_client, "PTc") \ EM(rxrpc_conn_put_service, "PTs") \ EM(rxrpc_conn_queued, "QUE") \ + EM(rxrpc_conn_reap_service, "RPs") \ E_(rxrpc_conn_seen, "SEE") #define rxrpc_client_traces \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c0cdcf980ffc3..abb524c2b8f8c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -867,6 +867,19 @@ static int rxrpc_release_sock(struct sock *sk) sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdcbc798f9217..a0082c407005e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -84,6 +84,7 @@ struct rxrpc_net { unsigned int nr_client_conns; unsigned int nr_active_client_conns; bool kill_all_client_conns; + bool live; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head waiting_client_conns; @@ -265,6 +266,7 @@ struct rxrpc_local { rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; + bool service_closed; /* Service socket closed */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -881,6 +883,7 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 97f6a8de4845d..785dfdb9fef14 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1079,6 +1079,8 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index a01a3791f06ac..726eda6140aed 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -20,7 +20,8 @@ /* * Time till a connection expires after last use (in seconds). */ -unsigned int rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_destroy_connection(struct rcu_head *); @@ -321,7 +322,7 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) { + if (n == 1) { rxnet = conn->params.local->rxnet; rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); } @@ -363,15 +364,14 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(to_delayed_work(work), struct rxrpc_net, service_conn_reaper); - unsigned long reap_older_than, earliest, idle_timestamp, now; + unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); now = jiffies; - reap_older_than = now - rxrpc_connection_expiry * HZ; - earliest = ULONG_MAX; + earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { @@ -381,15 +381,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - idle_timestamp = READ_ONCE(conn->idle_timestamp); - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), - (long)reap_older_than - (long)idle_timestamp); - - if (time_after(idle_timestamp, reap_older_than)) { - if (time_before(idle_timestamp, earliest)) - earliest = idle_timestamp; - continue; + if (rxnet->live) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } } /* The usage count sits at 1 whilst the object is unused on the @@ -397,6 +403,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; + trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); if (rxrpc_conn_is_client(conn)) BUG(); @@ -407,10 +414,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } write_unlock(&rxnet->conn_lock); - if (earliest != ULONG_MAX) { - _debug("reschedule reaper %ld", (long) earliest - now); + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, earliest - now); } @@ -439,7 +446,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - rxrpc_connection_expiry = 0; cancel_delayed_work(&rxnet->client_conn_reaper); rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5b..684c51d600c7f 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -22,6 +22,7 @@ static __net_init int rxrpc_init_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); int ret; + rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; @@ -60,6 +61,7 @@ static __net_init int rxrpc_init_net(struct net *net) return 0; err_proc: + rxnet->live = false; return ret; } @@ -70,6 +72,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); + rxnet->live = false; rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_locals(rxnet); From 3d18cbb7fd0cfdf0b2ca18139950a4b0c1a0a220 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: [PATCH 094/406] rxrpc: Fix conn expiry timers Fix the rxrpc connection expiry timers so that connections for closed AF_RXRPC sockets get deleted in a more timely fashion, freeing up the transport UDP port much more quickly. (1) Replace the delayed work items with work items plus timers so that timer_reduce() can be used to shorten them and so that the timer doesn't requeue the work item if the net namespace is dead. (2) Don't use queue_delayed_work() as that won't alter the timeout if the timer is already running. (3) Don't rearm the timers if the network namespace is dead. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 ++ net/rxrpc/ar-internal.h | 6 ++++-- net/rxrpc/conn_client.c | 30 +++++++++++++++++++----------- net/rxrpc/conn_object.c | 28 +++++++++++++++++----------- net/rxrpc/net_ns.c | 30 ++++++++++++++++++++++++++---- 5 files changed, 68 insertions(+), 28 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index abb524c2b8f8c..8f7cf4c042be2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -895,6 +895,8 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); + rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper); + rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper); rxrpc_put_local(rx->local); rx->local = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a0082c407005e..416688381eb7d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -79,7 +79,8 @@ struct rxrpc_net { struct list_head conn_proc_list; /* List of conns in this namespace for proc */ struct list_head service_conns; /* Service conns in this namespace */ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ - struct delayed_work service_conn_reaper; + struct work_struct service_conn_reaper; + struct timer_list service_conn_reap_timer; unsigned int nr_client_conns; unsigned int nr_active_client_conns; @@ -90,7 +91,8 @@ struct rxrpc_net { struct list_head waiting_client_conns; struct list_head active_client_conns; struct list_head idle_client_conns; - struct delayed_work client_conn_reaper; + struct work_struct client_conn_reaper; + struct timer_list client_conn_reap_timer; struct list_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 785dfdb9fef14..7f74ca3059f8f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -691,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); ret = rxrpc_get_client_conn(call, cp, srx, gfp); @@ -756,6 +756,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) } } +/* + * Set the reap timer. + */ +static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +{ + unsigned long now = jiffies; + unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; + + if (rxnet->live) + timer_reduce(&rxnet->client_conn_reap_timer, reap_at); +} + /* * Disconnect a client call. */ @@ -896,9 +908,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); if (rxnet->idle_client_conns.next == &conn->cache_link && !rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - rxrpc_conn_idle_client_expiry); + rxrpc_set_client_reap_timer(rxnet); } else { trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; @@ -1036,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, client_conn_reaper); + container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; bool did_discard = false; @@ -1116,9 +1125,8 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) */ _debug("not yet"); if (!rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - conn_expires_at - now); + timer_reduce(&rxnet->client_conn_reap_timer, + conn_expires_at); out: spin_unlock(&rxnet->client_conn_cache_lock); @@ -1138,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) rxnet->kill_all_client_conns = true; spin_unlock(&rxnet->client_conn_cache_lock); - cancel_delayed_work(&rxnet->client_conn_reaper); + del_timer_sync(&rxnet->client_conn_reap_timer); - if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) + if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) _debug("destroy: queue failed"); _leave(""); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 726eda6140aed..1aad04a32d5e2 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -310,22 +310,30 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) return conn; } +/* + * Set the service connection reap timer. + */ +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at) +{ + if (rxnet->live) + timer_reduce(&rxnet->service_conn_reap_timer, reap_at); +} + /* * Release a service connection */ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 1) { - rxnet = conn->params.local->rxnet; - rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); - } + if (n == 1) + rxrpc_set_service_reap_timer(conn->params.local->rxnet, + jiffies + rxrpc_connection_expiry); } /* @@ -362,8 +370,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, service_conn_reaper); + container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); @@ -417,8 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, - earliest - now); + rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { @@ -446,8 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - cancel_delayed_work(&rxnet->client_conn_reaper); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); + del_timer_sync(&rxnet->service_conn_reap_timer); + rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); write_lock(&rxnet->conn_lock); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 684c51d600c7f..f18c9248e0d4c 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -14,6 +14,24 @@ unsigned int rxrpc_net_id; +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, client_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->client_conn_reaper); +} + +static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, service_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->service_conn_reaper); +} + /* * Initialise a per-network namespace record. */ @@ -32,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); - INIT_DELAYED_WORK(&rxnet->service_conn_reaper, - rxrpc_service_connection_reaper); + INIT_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + timer_setup(&rxnet->service_conn_reap_timer, + rxrpc_service_conn_reap_timeout, 0); rxnet->nr_client_conns = 0; rxnet->nr_active_client_conns = 0; @@ -43,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->waiting_client_conns); INIT_LIST_HEAD(&rxnet->active_client_conns); INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_DELAYED_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); + INIT_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + timer_setup(&rxnet->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); INIT_LIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); From febae9bc946de2c85e324b20bb7dc7b2d4e49d37 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Mon, 27 Mar 2017 16:20:57 +0100 Subject: [PATCH 095/406] drm: hdlcd: Update PM code to save/restore console. Update the PM code to suspend/resume the fbdev_cma console. Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 764d0c83710ca..45b174fea36b2 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -432,9 +433,11 @@ static int __maybe_unused hdlcd_pm_suspend(struct device *dev) return 0; drm_kms_helper_poll_disable(drm); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 1); hdlcd->state = drm_atomic_helper_suspend(drm); if (IS_ERR(hdlcd->state)) { + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); return PTR_ERR(hdlcd->state); } @@ -451,8 +454,8 @@ static int __maybe_unused hdlcd_pm_resume(struct device *dev) return 0; drm_atomic_helper_resume(drm, hdlcd->state); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); - pm_runtime_set_active(dev); return 0; } From 860ec7c6e21cecca8c508a66ee211b56d87f5bd7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 23 Nov 2017 19:26:20 +0100 Subject: [PATCH 096/406] s390/debug: use pK for kernel pointers the s390 debug feature (/sys/kernel/debug/s390dbf/) shows the kernel pointer of the calling function even for kptr_restrict == 2. Let us use pK instead of p. This hides the kernel addresses for kptr_restrict == 2: root@host $ echo 2 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 0000000000000000 snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 1 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 0 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 58b9e127b6151..80e974adb9e8b 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", area, sec, usec, level, except_str, entry->id.fields.cpuid, (void *)caller); return rc; From 6a55d2cdf1bc140665cbfaed14de79acaf3758c4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:00 +0100 Subject: [PATCH 097/406] s390: block: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/block/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Stefan Haberland Cc: Jan Hoeppner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 1 + drivers/s390/block/dasd_devmap.c | 1 + drivers/s390/block/dasd_diag.c | 1 + drivers/s390/block/dasd_eckd.c | 1 + drivers/s390/block/dasd_fba.c | 1 + drivers/s390/block/dcssblk.c | 1 + drivers/s390/block/scm_blk.c | 1 + drivers/s390/block/xpram.c | 1 + 8 files changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 29f35e29d4801..fdb3b133e5e04 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index c95a4784c1911..e7cd28ff19844 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 98fb28e49d2c0..f035c2f25d35a 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Based on.......: linux/drivers/s390/block/mdisk.c diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8eafcd5fa0049..1a41ef4963387 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 6168ccdb389c3..a6b132f7e869e 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Bugreports.to..: diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 7abb240847c07..6aaefb7804369 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * dcssblk.c -- the S/390 block driver for dcss memory * diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index eb51893c74a4b..b4130c7880d87 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Block driver for s390 storage class memory. * diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 571a0709e1e5b..2a6334ca750ef 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Xpram.c -- the S/390 expanded memory RAM-disk * From 812141a9fe61446c948a71456c58090ac11a6d14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:01 +0100 Subject: [PATCH 098/406] s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 1 + drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/pkey_api.c | 1 + drivers/s390/crypto/zcrypt_api.c | 1 + drivers/s390/crypto/zcrypt_api.h | 1 + drivers/s390/crypto/zcrypt_card.c | 1 + drivers/s390/crypto/zcrypt_cca_key.h | 1 + drivers/s390/crypto/zcrypt_cex2a.c | 1 + drivers/s390/crypto/zcrypt_cex2a.h | 1 + drivers/s390/crypto/zcrypt_cex4.c | 1 + drivers/s390/crypto/zcrypt_error.h | 1 + drivers/s390/crypto/zcrypt_msgtype50.c | 1 + drivers/s390/crypto/zcrypt_msgtype50.h | 1 + drivers/s390/crypto/zcrypt_msgtype6.c | 1 + drivers/s390/crypto/zcrypt_msgtype6.h | 1 + drivers/s390/crypto/zcrypt_pcixcc.c | 1 + drivers/s390/crypto/zcrypt_pcixcc.h | 1 + drivers/s390/crypto/zcrypt_queue.c | 1 + 18 files changed, 18 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index faeba9db3d959..658626039171b 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 7e45c4d08cad4..b43e4d650940a 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 8dda5bb34a2f2..869ace34972d9 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * pkey device driver * diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index a9a56aa9c26b7..620407ac2faa8 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 73541a798db7a..4883d51521fd1 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index f85dacf1c2844..c0e7072f18a9c 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index 12cff6262566b..ebb9fbcb9bc07 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index b97c5d5ee5a4a..e00ffa44f85c0 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 0dce4b9af1841..6f6830e0bab33 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index e2eebc775a37a..f305538334adb 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * Author(s): Holger Dengler diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 13df60209ed33..8d8892d14bd0d 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index db5bde47dfb0d..8fdf5d65c4d29 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 5cc280318ee70..f6ce799c22393 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 785620d305043..3a19b69b846b5 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 7a0d5b57821f0..52d1408893779 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 600604782b65e..9963018a6bbc3 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index eacafc8962f20..a85778d5763c7 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 4742be0eec24f..5103727b4633b 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * From 724117b77bbe2b28f27728d58a432ed22630e33f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:02 +0100 Subject: [PATCH 099/406] s390: cio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/cio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Cornelia Huck Cc: Dong Jia Shi Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 1 + drivers/s390/cio/chp.c | 1 + drivers/s390/cio/chsc.c | 1 + drivers/s390/cio/chsc_sch.c | 1 + drivers/s390/cio/cio.c | 1 + drivers/s390/cio/cmf.c | 1 + drivers/s390/cio/css.c | 1 + drivers/s390/cio/device.c | 1 + drivers/s390/cio/device_fsm.c | 1 + drivers/s390/cio/device_ops.c | 1 + drivers/s390/cio/eadm_sch.c | 1 + drivers/s390/cio/isc.c | 1 + drivers/s390/cio/qdio_main.c | 1 + drivers/s390/cio/qdio_setup.c | 1 + drivers/s390/cio/scm.c | 1 + drivers/s390/cio/vfio_ccw_drv.c | 1 + 16 files changed, 16 insertions(+) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index e2f7b6e93efdd..bfec1485ca233 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bus driver for ccwgroup * diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index f4166f80c4d4e..5c94a3aec4dd2 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 1999, 2010 * Author(s): Cornelia Huck (cornelia.huck@de.ibm.com) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 7b0b295b2313b..c08fc5a8df0c6 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- channel subsystem call * diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 8e7e19b9e92c0..0015729d917d9 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 chsc subchannels * diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 89216174fcbba..987bf9a8c9f72 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- low level i/o calls * diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 7d59230e88bb3..320ce009ba41a 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux on zSeries Channel Measurement Facility support * diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index d3e504c3c3626..f4c2e71e7a562 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * driver for channel subsystem * diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 318d8269f5dee..e4ed6c651bcac 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * bus driver for ccw devices * diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index dd7d79d30edc4..1319122e9d123 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * finite state machine for device handling * diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index cf8c4ac6323a6..aa125d92cc650 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Copyright IBM Corp. 2002, 2009 * diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index ce16e4f45d440..53468ae64b999 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 eadm subchannels * diff --git a/drivers/s390/cio/isc.c b/drivers/s390/cio/isc.c index c592087be0f1a..77fde9f5ea8ba 100644 --- a/drivers/s390/cio/isc.c +++ b/drivers/s390/cio/isc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Functions for registration of I/O interruption subclasses on s390. * diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index ed4852fab44b5..59b4a3370cd5d 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux for s390 qdio support, buffer handling, qdio API and module support. * diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 9ae1380cbc313..98f3cfdc0d027 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * qdio queue initialization * diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index 1fa53ecdc2aaa..6bca1d5455d4f 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Recognize and maintain s390 storage class memory. * diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 82f05c4b8c526..ea6a2d0b2894d 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * VFIO based Physical Subchannel device driver * From 6f05e69e44d8167d3c97c3b3b657a94f3cb41d71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:03 +0100 Subject: [PATCH 100/406] s390: char: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/char/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/fs3270.c | 1 + drivers/s390/char/hmcdrv_mod.c | 1 + drivers/s390/char/monreader.c | 1 + drivers/s390/char/monwriter.c | 1 + drivers/s390/char/raw3270.c | 1 + drivers/s390/char/sclp_async.c | 1 + drivers/s390/char/tape_34xx.c | 1 + drivers/s390/char/tape_3590.c | 1 + drivers/s390/char/tape_class.c | 1 + drivers/s390/char/tape_core.c | 1 + drivers/s390/char/tty3270.c | 1 + drivers/s390/char/vmlogrdr.c | 1 + drivers/s390/char/vmur.c | 1 + drivers/s390/char/zcore.c | 1 + 14 files changed, 14 insertions(+) diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index c4518168fd02c..61822480a2a0b 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - fullscreen driver. * diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c index 251a318a9b754..1447d08872253 100644 --- a/drivers/s390/char/hmcdrv_mod.c +++ b/drivers/s390/char/hmcdrv_mod.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * HMC Drive DVD Module * diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index 027ac6ae5eea5..bf4ab4efed735 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for reading z/VM *MONITOR service records. * diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 571a7e3527553..76c158c415103 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for writing z/VM *MONITOR service records. * diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 5d4f053d7c38c..f8cd2935fbfd4 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - core functions. * diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index 19c25427f27fd..ee6f3b5637283 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Enable Asynchronous Notification via SCLP. * diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index de69f0ddc321d..6d73ee3f827a6 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3480/3490 tapes. * diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index e352047ed9f7a..37e65a05517f5 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3590 tapes. * diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index e7d23048d3f00..a07102472ce97 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2004 * diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 32503a60ee851..8d3370da2dfc2 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * basic function of the tape device driver * diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index e417ccd9e2998..1c98023cffd41 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - tty functions. * diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 62559dc0169f8..069b9ef08206b 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * character device driver for reading z/VM system service records * diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index fa90ef05afc00..52aa894243187 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux driver for System z and s390 unit record devices * (z/VM virtual punch, reader, printer) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index aaed778f67c4a..dd86559ce7fbc 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same From ab9953ff0f2e37092dc247ddd7c62fe6f03618dc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:04 +0100 Subject: [PATCH 101/406] s390: net: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/net/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Julian Wiedmann Cc: Ursula Braun Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/net/ctcm_main.c | 1 + drivers/s390/net/fsm.c | 1 + drivers/s390/net/lcs.c | 1 + drivers/s390/net/netiucv.c | 1 + drivers/s390/net/qeth_core_main.c | 1 + drivers/s390/net/qeth_core_sys.c | 1 + drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + drivers/s390/net/smsgiucv.c | 1 + drivers/s390/net/smsgiucv_app.c | 1 + 10 files changed, 10 insertions(+) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index be9f172185310..7ce98b70cad38 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2001, 2009 * Author(s): diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index 8c14c6c3ad3d6..f0c7c182b077f 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /** * A generic FSM based on fsm used in isdn4linux * diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index e131a03262ad7..36899e94d268e 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux for S/390 Lan Channel Station Network Driver * diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index b9c7c1e61da29..150053131baf5 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV network driver * diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49b9efeba1bda..98a7f84540ab2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index b22ed2a57acd9..ae81534de9122 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d2537c09126d6..93d7e345d1804 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aadd384316a37..0f8c12738b067 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index a851d34c642b5..93405e0bad3c9 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV special message driver * diff --git a/drivers/s390/net/smsgiucv_app.c b/drivers/s390/net/smsgiucv_app.c index 32515a201bbc6..0a263999f7ae4 100644 --- a/drivers/s390/net/smsgiucv_app.c +++ b/drivers/s390/net/smsgiucv_app.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Deliver z/VM CP special messages (SMSG) as uevents. * From 40bf411ee6d1f2d9833486a9d4318734180a997e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:05 +0100 Subject: [PATCH 102/406] s390: scsi: zfcp_aux: add SPDX identifier It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/scsi/zfcp_aux.c file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Steffen Maier Cc: Benjamin Block Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_aux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 84752152d41fd..a3a8c8d9d7171 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * zfcp device driver * From 31fb16730a0d8baf0c04c93ebf606340c9ea8393 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:06 +0100 Subject: [PATCH 103/406] s390: virtio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/virtio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Cornelia Huck Cc: Halil Pasic Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/virtio/Makefile | 1 + drivers/s390/virtio/virtio_ccw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index f68af1f317f15..7c6f0c8db8834 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b18fe2014cf21..56346caadb21a 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ccw based virtio transport * From 0b622e60bc6c4eca75d517b10f15914ecd58e6b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:07 +0100 Subject: [PATCH 104/406] s390: crypto: Remove redundant license text Now that the SPDX tag is in all drivers/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 14 -------------- drivers/s390/crypto/ap_bus.h | 14 -------------- drivers/s390/crypto/pkey_api.c | 5 ----- drivers/s390/crypto/zcrypt_api.c | 14 -------------- drivers/s390/crypto/zcrypt_api.h | 14 -------------- drivers/s390/crypto/zcrypt_card.c | 10 ---------- drivers/s390/crypto/zcrypt_cca_key.h | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.c | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.h | 14 -------------- drivers/s390/crypto/zcrypt_error.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.h | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.c | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.h | 14 -------------- drivers/s390/crypto/zcrypt_queue.c | 10 ---------- 17 files changed, 221 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 658626039171b..48d55dc9e9864 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "ap" diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index b43e4d650940a..e0827eaa42f1d 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus header file. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _AP_BUS_H_ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 869ace34972d9..e7c2e4f9529ac 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "pkey" diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 620407ac2faa8..ce15f101ee282 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 4883d51521fd1..9fff8912f6e3b 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_API_H_ diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c0e7072f18a9c..233e1e695208b 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index ebb9fbcb9bc07..011d61d8a4ae5 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CCA_KEY_H_ diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index e00ffa44f85c0..e701194d36115 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 6f6830e0bab33..c3c116777c937 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CEX2A_H_ diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 8d8892d14bd0d..01598d83c60a0 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_ERROR_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 8fdf5d65c4d29..afe1b2bcd7ecf 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index f6ce799c22393..0a36545cfb8ee 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE50_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 3a19b69b846b5..f54bef4a928e9 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 52d1408893779..d314f4525518b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE6_H_ diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 9963018a6bbc3..159b0a0dd211b 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index a85778d5763c7..d678a3af83a7b 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_PCIXCC_H_ diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 5103727b4633b..720434e18007e 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include From 13d1d559f04a893b4a32ec04fb9d7210ec4d9597 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:08 +0100 Subject: [PATCH 105/406] s390: drivers: Remove redundant license text Now that the SPDX tag is in all drivers/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Julian Wiedmann Cc: Ursula Braun Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/zcore.c | 1 - drivers/s390/cio/cmf.c | 14 -------------- drivers/s390/cio/css.c | 2 -- drivers/s390/cio/device.c | 2 -- drivers/s390/cio/device_ops.c | 2 -- drivers/s390/net/lcs.c | 14 -------------- drivers/s390/net/netiucv.c | 15 --------------- drivers/s390/net/smsgiucv.c | 14 -------------- drivers/s390/virtio/Makefile | 4 ---- drivers/s390/virtio/virtio_ccw.c | 4 ---- 10 files changed, 72 deletions(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index dd86559ce7fbc..4369662cfff5a 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -8,7 +8,6 @@ * * Copyright IBM Corp. 2003, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "zdump" diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 320ce009ba41a..5e495c62cfa77 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -8,20 +8,6 @@ * Cornelia Huck * * original idea from Natarajan Krishnaswami - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index f4c2e71e7a562..0f11dce6e2240 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -6,8 +6,6 @@ * * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e4ed6c651bcac..75a245f38e2eb 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -6,8 +6,6 @@ * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index aa125d92cc650..1caf6a398760b 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -4,8 +4,6 @@ * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #include #include diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 36899e94d268e..92ae84a927fcf 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -8,20 +8,6 @@ * Rewritten by * Frank Pavlic and * Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "lcs" diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 150053131baf5..5ce2424ca7290 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -19,21 +19,6 @@ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * Alan Altmark (Alan_Altmark@us.ibm.com) Sept. 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define KMSG_COMPONENT "netiucv" diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 93405e0bad3c9..3b0c8b8a7634d 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -5,20 +5,6 @@ * Copyright IBM Corp. 2003, 2009 * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index 7c6f0c8db8834..2dc4d9aab6345 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -2,9 +2,5 @@ # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License (version 2 only) -# as published by the Free Software Foundation. obj-$(CONFIG_S390_GUEST) += virtio_ccw.o diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 56346caadb21a..ba2e0856d22cd 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ From b1c0de0e51504b84cf4a3eb46541e98557bb460d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 15:19:32 +0100 Subject: [PATCH 106/406] s390: sthyi: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/sthyi file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sthyi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 12981e197f012..db163cf3606a2 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * store hypervisor information instruction emulation functions. * From a17ae4c3a6add7579e9962df5dd12cb1f3bed431 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:32 +0100 Subject: [PATCH 107/406] s390: kernel: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 1 + arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/kprobes.c | 1 + arch/s390/kernel/lgr.c | 1 + arch/s390/kernel/module.c | 1 + arch/s390/kernel/nmi.c | 1 + arch/s390/kernel/perf_cpum_cf.c | 1 + arch/s390/kernel/perf_cpum_sf.c | 1 + arch/s390/kernel/perf_event.c | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/kernel/stacktrace.c | 1 + arch/s390/kernel/time.c | 1 + arch/s390/kernel/topology.c | 1 + arch/s390/kernel/vdso.c | 1 + arch/s390/kernel/vdso32/clock_getres.S | 1 + arch/s390/kernel/vdso32/clock_gettime.S | 1 + arch/s390/kernel/vdso32/gettimeofday.S | 1 + arch/s390/kernel/vdso64/clock_getres.S | 1 + arch/s390/kernel/vdso64/clock_gettime.S | 1 + arch/s390/kernel/vdso64/gettimeofday.S | 1 + arch/s390/kernel/vtime.c | 1 + 22 files changed, 22 insertions(+) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 0a0e14335a045..b2c68fbf26346 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Disassemble s390 instructions. * diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2aa545dca4d53..5b23c4f6e50cd 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack dumping functions * diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 310e59e6eb4b2..8ecb8726ac476 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ipl/reipl/dump support for Linux on s390. * diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1a6521af17514..ee055f7bdb104 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel Probes (KProbes) * diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index bf9622f0e6b16..452502f9a0d98 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux Guest Relocation (LGR) detection * diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7b87991416fd6..10d104589978a 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel module help for s390. * diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 6ff169253caee..c7a627620e5eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Machine check handler * diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d034233336..8f6296860186e 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x - CPU-measurement Counter Facility * diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 227b38bd82c94..6e404caac856f 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for the System z CPU-measurement Sampling Facility * diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 93a386f4a3b5a..0ef3088513745 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x * diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 090053cf279bb..793da97f9a6e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2012 diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index e66687dc61446..460dcfba7d4ec 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack trace management functions * diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5cbd52169348f..44c012c866d2d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Time of day based timer functions. * diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index f9b393d4a0783..4d5b65e527b54 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Heiko Carstens diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 39a218703c50a..88f6fab68eb26 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * vdso setup for s390 * diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eca3f001f0813..eb9f22cbfd476 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index a5769b83d90e6..b642c0382238a 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 63b86dceb0bfe..8a89b54246d7b 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index c8513deb8c663..416bf711c2aaa 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 5d7b56b49458d..d2dee331b7cd7 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index b02e62f3bc12d..cf62a16110542 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index dd7178fbb4f3b..f24395a019182 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Virtual cpu timer based timer functions. * From 20a884f5e0180a6f67bd650bdb9d703c415c1436 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:34 +0100 Subject: [PATCH 108/406] s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 1 + arch/s390/crypto/arch_random.c | 1 + arch/s390/crypto/crc32-vx.c | 1 + arch/s390/crypto/des_s390.c | 1 + arch/s390/crypto/ghash_s390.c | 1 + arch/s390/crypto/paes_s390.c | 1 + arch/s390/crypto/prng.c | 1 + arch/s390/crypto/sha.h | 1 + arch/s390/crypto/sha256_s390.c | 1 + arch/s390/crypto/sha512_s390.c | 1 + arch/s390/crypto/sha_common.c | 1 + 11 files changed, 11 insertions(+) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b48e20dd94e96..5a7b60a65f967 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 36aefc07d10cd..7ad00354fb25c 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 arch random implementation. * diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b5..436865926c26e 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Crypto-API module for CRC-32 algorithms implemented with the * z/Architecture Vector Extension Facility. diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d296662bbf0a..c871b1086408b 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 564616d48d8bd..3b7f96c9eead8 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a4e903ed7e21c..53926a2b72850 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 3e47c4a0f18b3..a97a1802cfb4d 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 10f2007900790..19d344d493925 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 53c277999a286..4a31d6416d414 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 2f4caa1ef123d..e22ed139d0b63 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index c740f77285b2a..d359ec7fdd6bf 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * From ac41aaeedc62648208eb1b32cff768a9ffcfdd23 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:35 +0100 Subject: [PATCH 109/406] s390: mm: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/mm/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 1 + arch/s390/mm/gmap.c | 1 + arch/s390/mm/mmap.c | 1 + arch/s390/mm/pgtable.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 2dbdcd85b68f2..3596ef91cf683 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Collaborative memory management interface. * diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b2c140193b0af..05d459b638f55 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * KVM guest address space mapping code * diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5bea139517a2e..5ec9a8c2b768d 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * flexible mmap layout support * diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae677f814bc07..4f2b65d01a704 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Martin Schwidefsky From adbb3901685fb94a4a0ac62f7859724964054287 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:36 +0100 Subject: [PATCH 110/406] s390: pci: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/pci/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 1 + arch/s390/pci/pci_debug.c | 1 + arch/s390/pci/pci_dma.c | 1 + arch/s390/pci/pci_insn.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0fe649c0d5423..4902fed221c0e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c2f786f0ea068..b482e95b6249e 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012,2015 * diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 0d300ee00f4e9..f7aa5a77827ec 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 81b840bc6e4e7..19bcb3b45a70f 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 specific pci instructions * From 0caa8cdf1a02a8b45dbd8641c7b3e896051c1254 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:37 +0100 Subject: [PATCH 111/406] s390: appldata: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/appldata/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 1 + arch/s390/appldata/appldata_mem.c | 1 + arch/s390/appldata/appldata_net_sum.c | 1 + arch/s390/appldata/appldata_os.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ef3fb1b9201f0..cb6e8066b1ad6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 598df57085017..e68136c3c23aa 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 66037d2622b40..8bc14b0d1def0 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 45b3178200abc..433a994b1a89e 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). From 0b73214f8a4d7790efd68b9cc183d26e472ccefd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:38 +0100 Subject: [PATCH 112/406] s390: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the remaining arch/s390/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 1 + arch/s390/boot/install.sh | 1 + arch/s390/hypfs/inode.c | 1 + arch/s390/include/asm/cpu_mf.h | 1 + arch/s390/include/asm/kprobes.h | 1 + arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/asm/kvm_para.h | 1 + arch/s390/include/asm/livepatch.h | 1 + arch/s390/include/asm/syscall.h | 1 + arch/s390/include/asm/sysinfo.h | 1 + 10 files changed, 10 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 6b3f41985f28e..ffd95861f9ac1 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # s390/Makefile # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index aed3069699bd5..32a9367b9bf07 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 # # arch/s390x/boot/install.sh # diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index cf8a2d92467f3..05d62cdb78b52 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Hypervisor filesystem for Linux on s390. * diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 792cda339af1a..a9ba5e322bf38 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * CPU-measurement facilities * diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 921391f2341eb..396b065ebb2cc 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _ASM_S390_KPROBES_H #define _ASM_S390_KPROBES_H /* diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f3a9b5a445b64..66efb2979351a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kernel virtual machines on s390 * diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 41393052ac57e..2c623bf5c095a 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for paravirtual devices on s390 * diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 6de5c6cb0061a..d1f97fafcc208 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * livepatch.h - s390-specific Kernel Live Patching Core * diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6bc941be69217..4d6463eb7a2d5 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Access to user system call parameters and results * diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index a702cb9d42692..c2137e7d93109 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for store system information stsi * From 53634237e72b4f3c2dff2f92ec35792207730a98 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:40 +0100 Subject: [PATCH 113/406] s390: kernel: Remove redundant license text Now that the SPDX tag is in all arch/s390/kernel/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 14 -------------- arch/s390/kernel/module.c | 14 -------------- arch/s390/kernel/perf_cpum_cf.c | 4 ---- arch/s390/kernel/perf_cpum_sf.c | 4 ---- arch/s390/kernel/perf_event.c | 4 ---- arch/s390/kernel/sthyi.c | 4 ---- arch/s390/kernel/vdso.c | 4 ---- arch/s390/kernel/vdso32/clock_getres.S | 4 ---- arch/s390/kernel/vdso32/clock_gettime.S | 4 ---- arch/s390/kernel/vdso32/gettimeofday.S | 4 ---- arch/s390/kernel/vdso64/clock_getres.S | 4 ---- arch/s390/kernel/vdso64/clock_gettime.S | 4 ---- arch/s390/kernel/vdso64/gettimeofday.S | 4 ---- 13 files changed, 72 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index ee055f7bdb104..af3722c28fd96 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -2,20 +2,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 10d104589978a..b7abfad4fd7df 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -9,20 +9,6 @@ * * based on i386 version * Copyright (C) 2001 Rusty Russell. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 8f6296860186e..cc085e2d2ce99 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_cf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 6e404caac856f..1c9ddd7aa5ec8 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_sf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0ef3088513745..0d770e513abf4 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "perf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index db163cf3606a2..80b862e9c53c6 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -2,10 +2,6 @@ /* * store hypervisor information instruction emulation functions. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Copyright IBM Corp. 2016 * Author(s): Janosch Frank */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 88f6fab68eb26..f3a1c7c6824ef 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eb9f22cbfd476..f61df5253c23c 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index b642c0382238a..2d6ec3abe095e 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 8a89b54246d7b..aa8bf13a2edb1 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 416bf711c2aaa..faf5213b15dfa 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index d2dee331b7cd7..6046b3bfca462 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index cf62a16110542..cc9dbc27da6fb 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include From 94bf2f28c9923abe7a1fccc126a86a2401cb5a47 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:41 +0100 Subject: [PATCH 114/406] s390: include: Remove redundant license text Now that the SPDX tag is in all arch/s390/include/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 4 ---- arch/s390/include/asm/kprobes.h | 14 -------------- arch/s390/include/asm/kvm_host.h | 4 ---- arch/s390/include/asm/kvm_para.h | 6 ------ arch/s390/include/asm/livepatch.h | 7 ------- arch/s390/include/asm/syscall.h | 4 ---- arch/s390/include/asm/sysinfo.h | 4 ---- arch/s390/include/uapi/asm/kvm.h | 4 ---- arch/s390/include/uapi/asm/kvm_para.h | 4 ---- arch/s390/include/uapi/asm/kvm_perf.h | 4 ---- arch/s390/include/uapi/asm/virtio-ccw.h | 4 ---- arch/s390/include/uapi/asm/zcrypt.h | 14 -------------- 12 files changed, 73 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index a9ba5e322bf38..dd08db491b89e 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -5,10 +5,6 @@ * Copyright IBM Corp. 2012 * Author(s): Hendrik Brueckner * Jan Glauber - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 396b065ebb2cc..13de80cf741c0 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -4,20 +4,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * 2002-Oct Created by Vamsi Krishna S Kernel diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 66efb2979351a..e14f381757f67 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 2c623bf5c095a..74eeec9c0a809 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ /* @@ -21,8 +17,6 @@ * * Copyright IBM Corp. 2007,2008 * Author(s): Christian Borntraeger - * - * This work is licensed under the terms of the GNU GPL, version 2. */ #ifndef __S390_KVM_PARA_H #define __S390_KVM_PARA_H diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index d1f97fafcc208..672f95b12d406 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -8,13 +8,6 @@ * Jiri Slaby */ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 4d6463eb7a2d5..96f9a9151fde0 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_SYSCALL_H diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index c2137e7d93109..25057c118d563 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2001, 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Ulrich Weigand * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9ad172dcd912d..38535a57fef83 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -6,10 +6,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h index 0dc86b3a7cb0d..b9ab584adf43d 100644 --- a/arch/s390/include/uapi/asm/kvm_para.h +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -4,9 +4,5 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h index c36c97ffdc6fa..84606b8cc49e4 100644 --- a/arch/s390/include/uapi/asm/kvm_perf.h +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -4,10 +4,6 @@ * * Copyright 2014 IBM Corp. * Author(s): Alexander Yarygin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef __LINUX_KVM_PERF_S390_H diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h index 967aad3901051..3a77833c74dc2 100644 --- a/arch/s390/include/uapi/asm/virtio-ccw.h +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ #ifndef __KVM_VIRTIO_CCW_H diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 137ef473584ee..d568307321fcc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -9,20 +9,6 @@ * Eric Rossman (edrossma@us.ibm.com) * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __ASM_S390_ZCRYPT_H From a876ca4ddef01e8737da5c672e878c67798cb975 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:42 +0100 Subject: [PATCH 115/406] s390: crypto: Remove redundant license text Now that the SPDX tag is in all arch/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 6 ------ arch/s390/crypto/arch_random.c | 5 ----- arch/s390/crypto/des_s390.c | 6 ------ arch/s390/crypto/paes_s390.c | 5 ----- arch/s390/crypto/sha.h | 6 ------ arch/s390/crypto/sha256_s390.c | 6 ------ arch/s390/crypto/sha512_s390.c | 6 ------ arch/s390/crypto/sha_common.c | 6 ------ 8 files changed, 46 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5a7b60a65f967..d60798737d866 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -12,12 +12,6 @@ * Harald Freudenberger * * Derived from "crypto/aes_generic.c" - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #define KMSG_COMPONENT "aes_s390" diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 7ad00354fb25c..8720e9203ecfb 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #include diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index c871b1086408b..5346b5a80bb6c 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -7,12 +7,6 @@ * Copyright IBM Corp. 2003, 2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 53926a2b72850..003932db8d12d 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -8,11 +8,6 @@ * Copyright IBM Corp. 2017 * Author(s): Martin Schwidefsky * Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "paes_s390" diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 19d344d493925..d6f8258b44df3 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 4a31d6416d414..944aa6b237cd8 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -7,12 +7,6 @@ * s390 Version: * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index e22ed139d0b63..b17eded532b12 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index d359ec7fdd6bf..cf0718d121bcb 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include From fec37202e8a5c060e0c46cd353bf0b6d26b9cb02 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:43 +0100 Subject: [PATCH 116/406] s390: Remove redundant license text Now that the SPDX tag is in all arch/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording in the remaining files can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 4 ---- arch/s390/boot/install.sh | 4 ---- arch/s390/hypfs/inode.c | 1 - arch/s390/mm/mmap.c | 15 --------------- 4 files changed, 24 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ffd95861f9ac1..de54cfc6109d8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -7,10 +7,6 @@ # for "archclean" and "archdep" for cleaning up and making dependencies for # this architecture # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1994 by Linus Torvalds # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 32a9367b9bf07..bed227f267ae5 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -3,10 +3,6 @@ # # arch/s390x/boot/install.sh # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1995 by Linus Torvalds # # Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 05d62cdb78b52..43bbe63e2992c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2006, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "hypfs" diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5ec9a8c2b768d..831bdcf407bbc 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -5,21 +5,6 @@ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * * Started by Ingo Molnar */ From 345f8f34bb473241d62803951c18a844dd705f8d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 16:23:15 +0100 Subject: [PATCH 117/406] s390: revert ELF_ET_DYN_BASE base changes This reverts commit a73dc5370e153ac63718d850bddf0c9aa9d871e6. Reducing the base address for 31-bit PIE executables from (STACK_TOP/3)*2 to 4MB broke several compat programs which use -fpie to move the executable out of the lower 16MB. Cc: # 4.13+ Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9a3cb3983c014..1a61b1b997f2a 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -194,13 +194,14 @@ struct arch_elf_state { #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* - * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address - * space open for things that want to use the area for 32-bit pointers. - */ -#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ - 0x100000000UL) +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. 64-bit + tasks are aligned to 4GB. */ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ + (STACK_TOP / 3 * 2) : \ + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ From c2cc215cde5b3fd4102c9595df66c6e4ff237be4 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Fri, 11 Aug 2017 15:32:48 +0300 Subject: [PATCH 118/406] drm: mali-dp: switch to drm_*_get(), drm_*_put() helpers Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() and drm_*_unreference() helpers. drm_*_reference() and drm_*_unreference() functions are just compatibility alias for drm_*_get() and drm_*_put() and should not be used by new code. So convert all users of compatibility functions to use the new APIs. Generated by: scripts/coccinelle/api/drm-get-put.cocci Signed-off-by: Cihangir Akturk Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_planes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 94e7e3fa3408c..f12f8eb126be7 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -57,7 +57,7 @@ static void malidp_de_plane_destroy(struct drm_plane *plane) struct malidp_plane *mp = to_malidp_plane(plane); if (mp->base.fb) - drm_framebuffer_unreference(mp->base.fb); + drm_framebuffer_put(mp->base.fb); drm_plane_helper_disable(plane); drm_plane_cleanup(plane); From 0970d7a2f5b0dcdd520c7655210d677f6e9a878e Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: [PATCH 119/406] drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split patch into hdlcd and mali-dp versions] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index b8944666a18f0..a2b698c983b45 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -671,7 +671,7 @@ static int malidp_bind(struct device *dev) malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); alloc_fail: of_reserved_mem_device_release(dev); @@ -704,7 +704,7 @@ static void malidp_unbind(struct device *dev) malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); of_reserved_mem_device_release(dev); } From a6993b215a719ad5758c1bced5f8df95add070bf Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 15:48:43 +0100 Subject: [PATCH 120/406] drm: mali-dp: Separate static internal data into a read-only structure. The malidp_hw_device structure that the driver uses to handle the differences between versions of the IP contains both non-changeable data and fields that get updated at probe time. Previously we were copying the read-only part into allocated memory, but that can be completely avoided by splitting the structure into a read-only part and keeping the runtime modifiable fields into the old structure. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 13 +++--- drivers/gpu/drm/arm/malidp_drv.c | 30 ++++++------- drivers/gpu/drm/arm/malidp_hw.c | 46 ++++++++++---------- drivers/gpu/drm/arm/malidp_hw.h | 65 +++++++++++++++++------------ drivers/gpu/drm/arm/malidp_planes.c | 19 ++++----- 5 files changed, 93 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 3615d18a7ddf3..153a496706262 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -65,8 +65,8 @@ static void malidp_crtc_atomic_enable(struct drm_crtc *crtc, /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); - hwdev->modeset(hwdev, &vm); - hwdev->leave_config_mode(hwdev); + hwdev->hw->modeset(hwdev, &vm); + hwdev->hw->leave_config_mode(hwdev); drm_crtc_vblank_on(crtc); } @@ -78,7 +78,8 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, int err; drm_crtc_vblank_off(crtc); - hwdev->enter_config_mode(hwdev); + hwdev->hw->enter_config_mode(hwdev); + clk_disable_unprepare(hwdev->pxlclk); err = pm_runtime_put(crtc->dev->dev); @@ -319,7 +320,7 @@ static int malidp_crtc_atomic_check_scaling(struct drm_crtc *crtc, mclk_calc: drm_display_mode_to_videomode(&state->adjusted_mode, &vm); - ret = hwdev->se_calc_mclk(hwdev, s, &vm); + ret = hwdev->hw->se_calc_mclk(hwdev, s, &vm); if (ret < 0) return -EINVAL; return 0; @@ -475,7 +476,7 @@ static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); return 0; } @@ -485,7 +486,7 @@ static void malidp_crtc_disable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); } static const struct drm_crtc_funcs malidp_crtc_funcs = { diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index a2b698c983b45..91f2b0191368c 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -47,10 +47,10 @@ static void malidp_write_gamma_table(struct malidp_hw_device *hwdev, * directly. */ malidp_hw_write(hwdev, gamma_write_mask, - hwdev->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); for (i = 0; i < MALIDP_COEFFTAB_NUM_COEFFS; ++i) malidp_hw_write(hwdev, data[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_DATA); } @@ -103,7 +103,7 @@ void malidp_atomic_commit_update_coloradj(struct drm_crtc *crtc, for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; ++i) malidp_hw_write(hwdev, mc->coloradj_coeffs[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COLOR_ADJ_COEF + 4 * i); malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_CADJ, @@ -120,8 +120,8 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; struct malidp_se_config *s = &cs->scaler_config; struct malidp_se_config *old_s = &old_cs->scaler_config; - u32 se_control = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 se_control = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC); u32 layer_control = se_control + MALIDP_SE_LAYER_CONTROL; u32 scr = se_control + MALIDP_SE_SCALING_CONTROL; @@ -135,7 +135,7 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, return; } - hwdev->se_set_scaling_coeffs(hwdev, s, old_s); + hwdev->hw->se_set_scaling_coeffs(hwdev, s, old_s); val = malidp_hw_read(hwdev, se_control); val |= MALIDP_SE_SCALING_EN | MALIDP_SE_ALPHA_EN; @@ -170,9 +170,9 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm) int ret; atomic_set(&malidp->config_valid, 0); - hwdev->set_config_valid(hwdev); + hwdev->hw->set_config_valid(hwdev); /* don't wait for config_valid flag if we are in config mode */ - if (hwdev->in_config_mode(hwdev)) + if (hwdev->hw->in_config_mode(hwdev)) return 0; ret = wait_event_interruptible_timeout(malidp->wq, @@ -455,7 +455,7 @@ static int malidp_runtime_pm_suspend(struct device *dev) struct malidp_hw_device *hwdev = malidp->dev; /* we can only suspend if the hardware is in config mode */ - WARN_ON(!hwdev->in_config_mode(hwdev)); + WARN_ON(!hwdev->hw->in_config_mode(hwdev)); hwdev->pm_suspended = true; clk_disable_unprepare(hwdev->mclk); @@ -500,11 +500,7 @@ static int malidp_bind(struct device *dev) if (!hwdev) return -ENOMEM; - /* - * copy the associated data from malidp_drm_of_match to avoid - * having to keep a reference to the OF node after binding - */ - memcpy(hwdev, of_device_get_match_data(dev), sizeof(*hwdev)); + hwdev->hw = (struct malidp_hw *)of_device_get_match_data(dev); malidp->dev = hwdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -568,13 +564,13 @@ static int malidp_bind(struct device *dev) goto query_hw_fail; } - ret = hwdev->query_hw(hwdev); + ret = hwdev->hw->query_hw(hwdev); if (ret) { DRM_ERROR("Invalid HW configuration\n"); goto query_hw_fail; } - version = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_DE_CORE_ID); + version = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_DE_CORE_ID); DRM_INFO("found ARM Mali-DP%3x version r%dp%d\n", version >> 16, (version >> 12) & 0xf, (version >> 8) & 0xf); @@ -589,7 +585,7 @@ static int malidp_bind(struct device *dev) for (i = 0; i < MAX_OUTPUT_CHANNELS; i++) out_depth = (out_depth << 8) | (output_width[i] & 0xf); - malidp_hw_write(hwdev, out_depth, hwdev->map.out_depth_base); + malidp_hw_write(hwdev, out_depth, hwdev->hw->map.out_depth_base); atomic_set(&malidp->config_valid, 0); init_waitqueue_head(&malidp->wq); diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 17bca99e8ac82..2bfb542135ac5 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -183,7 +183,7 @@ static void malidp500_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) break; /* @@ -203,7 +203,7 @@ static void malidp500_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP500_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -216,7 +216,7 @@ static bool malidp500_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) return true; @@ -407,7 +407,7 @@ static void malidp550_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) break; /* @@ -427,7 +427,7 @@ static void malidp550_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP550_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -440,7 +440,7 @@ static bool malidp550_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) return true; @@ -616,7 +616,7 @@ static int malidp650_query_hw(struct malidp_hw_device *hwdev) return 0; } -const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = { +const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { [MALIDP_500] = { .map = { .coeffs_base = MALIDP500_COEFFS_BASE, @@ -751,7 +751,7 @@ static void malidp_hw_clear_irq(struct malidp_hw_device *hwdev, u8 block, u32 ir { u32 base = malidp_get_block_base(hwdev, block); - if (hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) + if (hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) malidp_hw_write(hwdev, irq, base + MALIDP_REG_CLEARIRQ); else malidp_hw_write(hwdev, irq, base + MALIDP_REG_STATUS); @@ -762,12 +762,14 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev; + struct malidp_hw *hw; const struct malidp_irq_map *de; u32 status, mask, dc_status; irqreturn_t ret = IRQ_NONE; hwdev = malidp->dev; - de = &hwdev->map.de_irq_map; + hw = hwdev->hw; + de = &hw->map.de_irq_map; /* * if we are suspended it is likely that we were invoked because @@ -778,8 +780,8 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) return IRQ_NONE; /* first handle the config valid IRQ */ - dc_status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); - if (dc_status & hwdev->map.dc_irq_map.vsync_irq) { + dc_status = malidp_hw_read(hwdev, hw->map.dc_base + MALIDP_REG_STATUS); + if (dc_status & hw->map.dc_irq_map.vsync_irq) { /* we have a page flip event */ atomic_set(&malidp->config_valid, 1); malidp_hw_clear_irq(hwdev, MALIDP_DC_BLOCK, dc_status); @@ -832,11 +834,11 @@ int malidp_de_irq_init(struct drm_device *drm, int irq) /* first enable the DC block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); /* now enable the DE block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); return 0; } @@ -847,9 +849,9 @@ void malidp_de_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); malidp_hw_disable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); } static irqreturn_t malidp_se_irq(int irq, void *arg) @@ -857,6 +859,8 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev = malidp->dev; + struct malidp_hw *hw = hwdev->hw; + const struct malidp_irq_map *se = &hw->map.se_irq_map; u32 status, mask; /* @@ -867,12 +871,12 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) if (hwdev->pm_suspended) return IRQ_NONE; - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); - if (!(status & hwdev->map.se_irq_map.irq_mask)) + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); + if (!(status & se->irq_mask)) return IRQ_NONE; - mask = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_MASKIRQ); - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); + mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ); + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); status &= mask; /* ToDo: status decoding and firing up of VSYNC and page flip events */ @@ -905,7 +909,7 @@ int malidp_se_irq_init(struct drm_device *drm, int irq) } malidp_hw_enable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); return 0; } @@ -916,5 +920,5 @@ void malidp_se_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); } diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h index 849ad9a30c3af..b0690ebb35652 100644 --- a/drivers/gpu/drm/arm/malidp_hw.h +++ b/drivers/gpu/drm/arm/malidp_hw.h @@ -120,18 +120,14 @@ struct malidp_hw_regmap { /* Unlike DP550/650, DP500 has 3 stride registers in its video layer. */ #define MALIDP_DEVICE_LV_HAS_3_STRIDES BIT(0) -struct malidp_hw_device { - const struct malidp_hw_regmap map; - void __iomem *regs; +struct malidp_hw_device; - /* APB clock */ - struct clk *pclk; - /* AXI clock */ - struct clk *aclk; - /* main clock for display core */ - struct clk *mclk; - /* pixel clock for display core */ - struct clk *pxlclk; +/* + * Static structure containing hardware specific data and pointers to + * functions that behave differently between various versions of the IP. + */ +struct malidp_hw { + const struct malidp_hw_regmap map; /* * Validate the driver instance against the hardware bits @@ -182,15 +178,6 @@ struct malidp_hw_device { struct videomode *vm); u8 features; - - u8 min_line_size; - u16 max_line_size; - - /* track the device PM state */ - bool pm_suspended; - - /* size of memory used for rotating layers, up to two banks available */ - u32 rotation_memory[2]; }; /* Supported variants of the hardware */ @@ -202,7 +189,33 @@ enum { MALIDP_MAX_DEVICES }; -extern const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES]; +extern const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES]; + +/* + * Structure used by the driver during runtime operation. + */ +struct malidp_hw_device { + struct malidp_hw *hw; + void __iomem *regs; + + /* APB clock */ + struct clk *pclk; + /* AXI clock */ + struct clk *aclk; + /* main clock for display core */ + struct clk *mclk; + /* pixel clock for display core */ + struct clk *pxlclk; + + u8 min_line_size; + u16 max_line_size; + + /* track the device PM state */ + bool pm_suspended; + + /* size of memory used for rotating layers, up to two banks available */ + u32 rotation_memory[2]; +}; static inline u32 malidp_hw_read(struct malidp_hw_device *hwdev, u32 reg) { @@ -240,9 +253,9 @@ static inline u32 malidp_get_block_base(struct malidp_hw_device *hwdev, { switch (block) { case MALIDP_SE_BLOCK: - return hwdev->map.se_base; + return hwdev->hw->map.se_base; case MALIDP_DC_BLOCK: - return hwdev->map.dc_base; + return hwdev->hw->map.dc_base; } return 0; @@ -275,7 +288,7 @@ u8 malidp_hw_get_format_id(const struct malidp_hw_regmap *map, static inline bool malidp_hw_pitch_valid(struct malidp_hw_device *hwdev, unsigned int pitch) { - return !(pitch & (hwdev->map.bus_align_bytes - 1)); + return !(pitch & (hwdev->hw->map.bus_align_bytes - 1)); } /* U16.16 */ @@ -308,8 +321,8 @@ static inline void malidp_se_set_enh_coeffs(struct malidp_hw_device *hwdev) }; u32 val = MALIDP_SE_SET_ENH_LIMIT_LOW(MALIDP_SE_ENH_LOW_LEVEL) | MALIDP_SE_SET_ENH_LIMIT_HIGH(MALIDP_SE_ENH_HIGH_LEVEL); - u32 image_enh = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 image_enh = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC) + MALIDP_SE_IMAGE_ENH; u32 enh_coeffs = image_enh + MALIDP_SE_ENH_COEFF0; int i; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index f12f8eb126be7..e7419797bbd16 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -185,8 +185,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, fb = state->fb; - ms->format = malidp_hw_get_format_id(&mp->hwdev->map, mp->layer->id, - fb->format->format); + ms->format = malidp_hw_get_format_id(&mp->hwdev->hw->map, + mp->layer->id, + fb->format->format); if (ms->format == MALIDP_INVALID_FORMAT_ID) return -EINVAL; @@ -211,7 +212,7 @@ static int malidp_de_plane_check(struct drm_plane *plane, * third plane stride register. */ if (ms->n_planes == 3 && - !(mp->hwdev->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && + !(mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && (state->fb->pitches[1] != state->fb->pitches[2])) return -EINVAL; @@ -229,9 +230,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, if (state->rotation & MALIDP_ROTATED_MASK) { int val; - val = mp->hwdev->rotmem_required(mp->hwdev, state->crtc_h, - state->crtc_w, - fb->format->format); + val = mp->hwdev->hw->rotmem_required(mp->hwdev, state->crtc_h, + state->crtc_w, + fb->format->format); if (val < 0) return val; @@ -251,7 +252,7 @@ static void malidp_de_set_plane_pitches(struct malidp_plane *mp, return; if (num_planes == 3) - num_strides = (mp->hwdev->features & + num_strides = (mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) ? 3 : 2; for (i = 0; i < num_strides; ++i) @@ -264,13 +265,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct malidp_plane *mp; - const struct malidp_hw_regmap *map; struct malidp_plane_state *ms = to_malidp_plane_state(plane->state); u32 src_w, src_h, dest_w, dest_h, val; int i; mp = to_malidp_plane(plane); - map = &mp->hwdev->map; /* convert src values from Q16 fixed point to integer */ src_w = plane->state->src_w >> 16; @@ -363,7 +362,7 @@ static const struct drm_plane_helper_funcs malidp_de_plane_helper_funcs = { int malidp_de_planes_init(struct drm_device *drm) { struct malidp_drm *malidp = drm->dev_private; - const struct malidp_hw_regmap *map = &malidp->dev->map; + const struct malidp_hw_regmap *map = &malidp->dev->hw->map; struct malidp_plane *plane = NULL; enum drm_plane_type plane_type; unsigned long crtcs = 1 << drm->mode_config.num_crtc; From 54243016ae35a0912a680f884835237fd6176820 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 17:39:24 +0100 Subject: [PATCH 121/406] drm: mali-dp: Disable planes when their CRTC gets disabled. Make sure only the planes on the active CRTCs get committed and that all planes on the disabled CRTCs get turned off. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 153a496706262..904fff80917ba 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -77,6 +77,9 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; int err; + /* always disable planes on the CRTC that is being turned off */ + drm_atomic_helper_disable_planes_on_crtc(old_state, false); + drm_crtc_vblank_off(crtc); hwdev->hw->enter_config_mode(hwdev); From 26c0a26d78bc7c2943d55121a32cb85a4594f8ea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Nov 2017 10:12:33 -0700 Subject: [PATCH 122/406] nvme-fc: don't use bit masks for set/test_bit() numbers So far harmless, but it's confusing and a bug waiting to happen if the shifts grow larger than 4. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e0577bf33f45e..0a8af4daef890 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -31,8 +31,8 @@ enum nvme_fc_queue_flags { - NVME_FC_Q_CONNECTED = (1 << 0), - NVME_FC_Q_LIVE = (1 << 1), + NVME_FC_Q_CONNECTED = 0, + NVME_FC_Q_LIVE, }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ From ca43a0c73d1d32e2c24698b1b9044557a0b197d4 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 19 Nov 2017 22:21:08 -0500 Subject: [PATCH 123/406] forcedeth: replace pci_unmap_page with dma_unmap_page The function pci_unmap_page is obsolete. So it is replaced with the function dma_unmap_page. CC: Srinivas Eeda CC: Joe Jin CC: Junxiao Bi Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index ac8439ceea10a..481876b5424c9 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1986,9 +1986,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb) tx_skb->dma_len, DMA_TO_DEVICE); else - pci_unmap_page(np->pci_dev, tx_skb->dma, + dma_unmap_page(&np->pci_dev->dev, tx_skb->dma, tx_skb->dma_len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); tx_skb->dma = 0; } } From 01a95b2141e337dea15ad48e60a35c72a9b10205 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2017 09:35:25 +0100 Subject: [PATCH 124/406] cfg80211: select CRYPTO_SHA256 if needed When regulatory database certificates are built-in, they're currently using the SHA256 digest algorithm, so add that to the build in that case. Also add a note that for custom certificates, one may need to add the right algorithms. Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index da91bb547db3e..1abcc4fc4df18 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -20,6 +20,10 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + # may need to update this when certificates are changed and are + # using a different algorithm, though right now they shouldn't + # (this is here rather than below to allow it to be a module) + select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS ---help--- cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. @@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR certificates like in the kernel sources (net/wireless/certs/) that shall be accepted for a signed regulatory database. + Note that you need to also select the correct CRYPTO_ modules + for your certificates, and if cfg80211 is built-in they also must be. + config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS From bb22cafd75686d799dabfe422571fac4b5c2ed94 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Fri, 24 Nov 2017 15:14:24 -0800 Subject: [PATCH 125/406] bcache: add a comment in journal bucket reading Journal bucket is a circular buffer, the bucket can be like YYYNNNYY, which means the first valid journal in the 7th bucket, and the latest valid journal in third bucket, in this case, if we do not try we the zero index first, We may get a valid journal in the 7th bucket, then we call find_next_bit(bitmap,ca->sb.njournal_buckets, l + 1) to get the first invalid bucket after the 7th bucket, because all these buckets is valid, so no bit 1 in bitmap, thus find_next_bit() function would return with ca->sb.njournal_buckets (8). So, after that, bcache only read journal in 7th and 8the bucket, the first to the third buckets are lost. So, it is important to let developer know that, we need to try the zero index at first in the hash-search, and avoid any breaks in future's code modification. [ML: Fixed whitespace & formatting & file permissions] Signed-off-by: Tang Junhui Signed-off-by: Michael Lyle Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 02a98ddb592d3..5018c56ebb676 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) * find a sequence of buckets with valid journal entries */ for (i = 0; i < ca->sb.njournal_buckets; i++) { + /* + * We must try the index l with ZERO first for + * correctness due to the scenario that the journal + * bucket is circular buffer which might have wrapped + */ l = (i * 2654435769U) % ca->sb.njournal_buckets; if (test_bit(l, bitmap)) From cf33c1ee5254c6a430bc1538232b49c3ea13e613 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 24 Nov 2017 15:14:25 -0800 Subject: [PATCH 126/406] bcache: Fix building error on MIPS This patch try to fix the building error on MIPS. The reason is MIPS has already defined the PTR macro, which conflicts with the PTR macro in include/uapi/linux/bcache.h. [fixed by mlyle: corrected a line-length issue] Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 2 +- drivers/md/bcache/extents.c | 2 +- drivers/md/bcache/journal.c | 2 +- include/uapi/linux/bcache.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a27d85232ce13..a0cc1bc6d8844 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 41c238fc37338..f9d391711595f 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5018c56ebb676..a87165c1d8e52 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -512,7 +512,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 90fc490f973f9..821f71a2e48fa 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -91,7 +91,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ From e393aa2446150536929140739f09c6ecbcbea7f0 Mon Sep 17 00:00:00 2001 From: Rui Hua Date: Fri, 24 Nov 2017 15:14:26 -0800 Subject: [PATCH 127/406] bcache: recover data from backing when data is clean When we send a read request and hit the clean data in cache device, there is a situation called cache read race in bcache(see the commit in the tail of cache_look_up(), the following explaination just copy from there): The bucket we're reading from might be reused while our bio is in flight, and we could then end up reading the wrong data. We guard against this by checking (in bch_cache_read_endio()) if the pointer is stale again; if so, we treat it as an error (s->iop.error = -EINTR) and reread from the backing device (but we don't pass that error up anywhere) It should be noted that cache read race happened under normal circumstances, not the circumstance when SSD failed, it was counted and shown in /sys/fs/bcache/XXX/internal/cache_read_races. Without this patch, when we use writeback mode, we will never reread from the backing device when cache read race happened, until the whole cache device is clean, because the condition (s->recoverable && (dc && !atomic_read(&dc->has_dirty))) is false in cached_dev_read_error(). In this situation, the s->iop.error(= -EINTR) will be passed up, at last, user will receive -EINTR when it's bio end, this is not suitable, and wield to up-application. In this patch, we use s->read_dirty_data to judge whether the read request hit dirty data in cache device, it is safe to reread data from the backing device when the read request hit clean data. This can not only handle cache read race, but also recover data when failed read request from cache device. [edited by mlyle to fix up whitespace, commit log title, comment spelling] Fixes: d59b23795933 ("bcache: only permit to recovery read error when cache device is clean") Cc: # 4.14 Signed-off-by: Hua Rui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3a7aed7282b2a..643c3021624fa 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); /* - * If cache device is dirty (dc->has_dirty is non-zero), then - * recovery a failed read request from cached device may get a - * stale data back. So read failure recovery is only permitted - * when cache device is clean. + * If read request hit dirty data (s->read_dirty_data is true), + * then recovery a failed read request from cached device may + * get a stale data back. So read failure recovery is only + * permitted when read request hit clean data in cache device, + * or when cache read race happened. */ - if (s->recoverable && - (dc && !atomic_read(&dc->has_dirty))) { + if (s->recoverable && !s->read_dirty_data) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); From 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: [PATCH 128/406] bcache: check return value of register_shrinker register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 11c5503d31dc3..81e8dc3dbe5e3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } From c14ca8386539a298c1c19b003fe55e37d0f0e89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= Date: Thu, 23 Nov 2017 13:49:06 +0100 Subject: [PATCH 129/406] crypto: skcipher - Fix skcipher_walk_aead_common The skcipher_walk_aead_common function calls scatterwalk_copychunks on the input and output walks to skip the associated data. If the AD end at an SG list entry boundary, then after these calls the walks will still be pointing to the end of the skipped region. These offsets are later checked for alignment in skcipher_walk_next, so the skcipher_walk may detect the alignment incorrectly. This patch fixes it by calling scatterwalk_done after the copychunks calls to ensure that the offsets refer to the right SG list entry. Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") Cc: Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/skcipher.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 4faa0fd53b0c1..6c45ed536664d 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -517,6 +517,9 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2); scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2); + scatterwalk_done(&walk->in, 0, walk->total); + scatterwalk_done(&walk->out, 0, walk->total); + walk->iv = req->iv; walk->oiv = req->iv; From fa6d7cb5d76cf0467c61420fc9238045aedfd379 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 23 Nov 2017 22:34:31 +0300 Subject: [PATCH 130/406] net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts Don't offload IP header checksum to NIC. This fixes a previous patch which enabled checksum offloading for both IPv4 and IPv6 packets. So L3 checksum offload was getting enabled for IPv6 pkts. And HW is dropping these pkts as it assumes the pkt is IPv4 when IP csum offload is set in the SQ descriptor. Fixes: 3a9024f52c2e ("net: thunderx: Enable TSO and checksum offloads for ipv6") Signed-off-by: Sunil Goutham Signed-off-by: Aleksey Makarov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index d4496e9afcdf3..8b2c31e2a2b02 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1355,7 +1355,6 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, /* Offload checksum calculation to HW */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->csum_l3 = 1; /* Enable IP csum calculation */ hdr->l3_offset = skb_network_offset(skb); hdr->l4_offset = skb_transport_offset(skb); From 540c11159dcece5c4a8157a7b39336316085470f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 24 Nov 2017 14:05:36 +0300 Subject: [PATCH 131/406] net: thunderbolt: Stop using zero to mean no valid DMA mapping Commit 86dabda426ac ("net: thunderbolt: Clear finished Tx frame bus address in tbnet_tx_callback()") fixed a DMA-API violation where the driver called dma_unmap_page() in tbnet_free_buffers() for a bus address that might already be unmapped. The fix was to zero out the bus address of a frame in tbnet_tx_callback(). However, as pointed out by David Miller, zero might well be valid mapping (at least in theory) so it is not good idea to use it here. It turns out that we don't need the whole map/unmap dance for Tx buffers at all. Instead we can map the buffers when they are initially allocated and unmap them when the interface is brought down. In between we just DMA sync the buffers for the CPU or device as needed. Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 57 +++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 228d4aa6d9ae3..ca5e375de27c1 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -335,7 +335,7 @@ static void tbnet_free_buffers(struct tbnet_ring *ring) if (ring->ring->is_tx) { dir = DMA_TO_DEVICE; order = 0; - size = tbnet_frame_size(tf); + size = TBNET_FRAME_SIZE; } else { dir = DMA_FROM_DEVICE; order = TBNET_RX_PAGE_ORDER; @@ -512,6 +512,7 @@ static int tbnet_alloc_rx_buffers(struct tbnet *net, unsigned int nbuffers) static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) { struct tbnet_ring *ring = &net->tx_ring; + struct device *dma_dev = tb_ring_dma_device(ring->ring); struct tbnet_frame *tf; unsigned int index; @@ -522,7 +523,9 @@ static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) tf = &ring->frames[index]; tf->frame.size = 0; - tf->frame.buffer_phy = 0; + + dma_sync_single_for_cpu(dma_dev, tf->frame.buffer_phy, + tbnet_frame_size(tf), DMA_TO_DEVICE); return tf; } @@ -531,13 +534,8 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, bool canceled) { struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame); - struct device *dma_dev = tb_ring_dma_device(ring); struct tbnet *net = netdev_priv(tf->dev); - dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), - DMA_TO_DEVICE); - tf->frame.buffer_phy = 0; - /* Return buffer to the ring */ net->tx_ring.prod++; @@ -548,10 +546,12 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, static int tbnet_alloc_tx_buffers(struct tbnet *net) { struct tbnet_ring *ring = &net->tx_ring; + struct device *dma_dev = tb_ring_dma_device(ring->ring); unsigned int i; for (i = 0; i < TBNET_RING_SIZE; i++) { struct tbnet_frame *tf = &ring->frames[i]; + dma_addr_t dma_addr; tf->page = alloc_page(GFP_KERNEL); if (!tf->page) { @@ -559,7 +559,17 @@ static int tbnet_alloc_tx_buffers(struct tbnet *net) return -ENOMEM; } + dma_addr = dma_map_page(dma_dev, tf->page, 0, TBNET_FRAME_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { + __free_page(tf->page); + tf->page = NULL; + tbnet_free_buffers(ring); + return -ENOMEM; + } + tf->dev = net->dev; + tf->frame.buffer_phy = dma_addr; tf->frame.callback = tbnet_tx_callback; tf->frame.sof = TBIP_PDF_FRAME_START; tf->frame.eof = TBIP_PDF_FRAME_END; @@ -881,19 +891,6 @@ static int tbnet_stop(struct net_device *dev) return 0; } -static bool tbnet_xmit_map(struct device *dma_dev, struct tbnet_frame *tf) -{ - dma_addr_t dma_addr; - - dma_addr = dma_map_page(dma_dev, tf->page, 0, tbnet_frame_size(tf), - DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, dma_addr)) - return false; - - tf->frame.buffer_phy = dma_addr; - return true; -} - static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, struct tbnet_frame **frames, u32 frame_count) { @@ -908,13 +905,14 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { /* No need to calculate checksum so we just update the - * total frame count and map the frames for DMA. + * total frame count and sync the frames for DMA. */ for (i = 0; i < frame_count; i++) { hdr = page_address(frames[i]->page); hdr->frame_count = cpu_to_le32(frame_count); - if (!tbnet_xmit_map(dma_dev, frames[i])) - goto err_unmap; + dma_sync_single_for_device(dma_dev, + frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); } return true; @@ -983,21 +981,14 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, *tucso = csum_fold(wsum); /* Checksum is finally calculated and we don't touch the memory - * anymore, so DMA map the frames now. + * anymore, so DMA sync the frames now. */ for (i = 0; i < frame_count; i++) { - if (!tbnet_xmit_map(dma_dev, frames[i])) - goto err_unmap; + dma_sync_single_for_device(dma_dev, frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); } return true; - -err_unmap: - while (i--) - dma_unmap_page(dma_dev, frames[i]->frame.buffer_phy, - tbnet_frame_size(frames[i]), DMA_TO_DEVICE); - - return false; } static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num, From a60b3f515d30d0fe8537c64671926879a3548103 Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Fri, 24 Nov 2017 12:27:58 +0100 Subject: [PATCH 132/406] net: sched: crash on blocks with goto chain action tcf_block_put_ext has assumed that all filters (and thus their goto actions) are destroyed in RCU callback and thus can not race with our list iteration. However, that is not true during netns cleanup (see tcf_exts_get_net comment). Prevent the user after free by holding all chains (except 0, that one is already held). foreach_safe is not enough in this case. To reproduce, run the following in a netns and then delete the ns: ip link add dtest type dummy tc qdisc add dev dtest ingress tc filter add dev dtest chain 1 parent ffff: handle 1 prio 1 flower action goto chain 2 Fixes: 822e86d997 ("net_sched: remove tcf_block_put_deferred()") Signed-off-by: Roman Kapl Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7d97f612c9b94..ddcf04b4ab437 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -336,7 +336,8 @@ static void tcf_block_put_final(struct work_struct *work) struct tcf_chain *chain, *tmp; rtnl_lock(); - /* Only chain 0 should be still here. */ + + /* At this point, all the chains should have refcnt == 1. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_put(chain); rtnl_unlock(); @@ -344,15 +345,21 @@ static void tcf_block_put_final(struct work_struct *work) } /* XXX: Standalone actions are not allowed to jump to any chain, and bound - * actions should be all removed after flushing. However, filters are now - * destroyed in tc filter workqueue with RTNL lock, they can not race here. + * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + /* Hold a refcnt for all chains, except 0, so that they don't disappear + * while we are iterating. + */ + list_for_each_entry(chain, &block->chain_list, list) + if (chain->index) + tcf_chain_hold(chain); + + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); tcf_block_offload_unbind(block, q, ei); From afbea2cd253b5198350dfd8edb963567d05827d6 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Fri, 24 Nov 2017 06:25:28 -0800 Subject: [PATCH 133/406] VSOCK: Don't call vsock_stream_has_data in atomic context When using the host personality, VMCI will grab a mutex for any queue pair access. In the detach callback for the vmci vsock transport, we call vsock_stream_has_data while holding a spinlock, and vsock_stream_has_data will access a queue pair. To avoid this, we can simply omit calling vsock_stream_has_data for host side queue pairs, since the QPs are empty per default when the guest has detached. This bug affects users of VMware Workstation using kernel version 4.4 and later. Testing: Ran vsock tests between guest and host, and verified that with this change, the host isn't calling vsock_stream_has_data during detach. Ran mixedTest between guest and host using both guest and host as server. v2: Rebased on top of recent change to sk_state values Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Stefan Hajnoczi Signed-off-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 391775e3575c2..56573dc85709f 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -797,9 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk) /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data - * left in our consume queue. + * left in our consume queue. If the local endpoint is a host, + * we can't call vsock_stream_has_data, since that may block, + * but a host endpoint can't read data once the VM has + * detached, so there is no available data in that case. */ - if (vsock_stream_has_data(vsk) <= 0) { + if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || + vsock_stream_has_data(vsk) <= 0) { sk->sk_state = TCP_CLOSE; if (sk->sk_state == TCP_SYN_SENT) { @@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.4.0-k"); +MODULE_VERSION("1.0.5.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); From 9e741045faea17e28663f14a45f7f3304827c968 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 24 Nov 2017 11:36:06 -0500 Subject: [PATCH 134/406] net: dsa: fix 'increment on 0' warning Setting the refcount to 0 when allocating a tree to match the number of switch devices it holds may cause an 'increment on 0; use-after-free', if CONFIG_REFCOUNT_FULL is enabled. To fix this, do not decrement the refcount of a newly allocated tree, increment it when an already allocated tree is found, and decrement it after the probing of a switch, as done with the previous behavior. At the same time, make dsa_tree_get and dsa_tree_put accept a NULL argument to simplify callers, and return the tree after incrementation, as most kref users like of_node_get and of_node_put do. Fixes: 8e5bf9759a06 ("net: dsa: simplify tree reference counting") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 44e3fb7dec8cf..1e287420ff491 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_tree_list, &dst->list); - /* Initialize the reference counter to the number of switches, not 1 */ kref_init(&dst->refcount); - refcount_set(&dst->refcount.refcount, 0); return dst; } @@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst) kfree(dst); } -static struct dsa_switch_tree *dsa_tree_touch(int index) +static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) { - struct dsa_switch_tree *dst; - - dst = dsa_tree_find(index); - if (!dst) - dst = dsa_tree_alloc(index); + if (dst) + kref_get(&dst->refcount); return dst; } -static void dsa_tree_get(struct dsa_switch_tree *dst) +static struct dsa_switch_tree *dsa_tree_touch(int index) { - kref_get(&dst->refcount); + struct dsa_switch_tree *dst; + + dst = dsa_tree_find(index); + if (dst) + return dsa_tree_get(dst); + else + return dsa_tree_alloc(index); } static void dsa_tree_release(struct kref *ref) @@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref) static void dsa_tree_put(struct dsa_switch_tree *dst) { - kref_put(&dst->refcount, dsa_tree_release); + if (dst) + kref_put(&dst->refcount, dsa_tree_release); } static bool dsa_port_is_dsa(struct dsa_port *port) @@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds) mutex_lock(&dsa2_mutex); err = dsa_switch_probe(ds); + dsa_tree_put(ds->dst); mutex_unlock(&dsa2_mutex); return err; From 7bbefcfac1936c8d9082a828b09f42a3839cb06e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 24 Nov 2017 12:08:40 -0800 Subject: [PATCH 135/406] uapi: add SPDX identifier to vm_sockets_diag.h New file seems to have missed the SPDX license scan and update. Signed-off-by: Stephen Hemminger Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets_diag.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h index 14cd7dc5a187c..0b4dd54f3d1ea 100644 --- a/include/uapi/linux/vm_sockets_diag.h +++ b/include/uapi/linux/vm_sockets_diag.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* AF_VSOCK sock_diag(7) interface for querying open sockets */ #ifndef _UAPI__VM_SOCKETS_DIAG_H__ From cf21654b40968609779751b34e7923180968fe5b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: [PATCH 136/406] drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 47d1c132ac40b..1e3e9be7d77ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -379,29 +379,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -574,9 +595,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } From d12fb13f23199faa7e536acec1db49068e5a067d Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: [PATCH 137/406] drm/amdkfd: Fix SDMA ring buffer size calculation ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4859d263fa2a3..4728fad3fd742 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -202,8 +202,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; From 8c946b8988acec785bcf67088b6bd0747f36d2d3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: [PATCH 138/406] drm/amdkfd: Fix SDMA oversubsription handling SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2bec902fc9390..a3f1e62c60ba9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -191,6 +191,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && From c393e9b2d51540b74e18e555df14706098dbf2cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 13 Nov 2017 18:08:48 +0200 Subject: [PATCH 139/406] drm/amdkfd: fix amdkfd use-after-free GP fault Fix GP fault caused by dev_info() reference to a struct device* after the device has been freed (use after free). kfd_chardev_exit() frees the device so 'kfd_device' should not be used after calling kfd_chardev_exit(). Signed-off-by: Randy Dunlap Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 6c5a9cab55ded..f744caeaee049 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #define KFD_DRIVER_AUTHOR "AMD Inc. and others" @@ -132,7 +133,7 @@ static void __exit kfd_module_exit(void) kfd_process_destroy_wq(); kfd_topology_shutdown(); kfd_chardev_exit(); - dev_info(kfd_device, "Removed module\n"); + pr_info("amdkfd: Removed module\n"); } module_init(kfd_module_init); From b4d085201d86af69cbda2214c6dafc0be240ef9f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Nov 2017 03:35:27 +0300 Subject: [PATCH 140/406] uapi: fix linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by via to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:236:2: error: unknown type name 'uint64_t' uint64_t va_addr; /* to KFD */ /usr/include/linux/kfd_ioctl.h:237:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:238:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/linux/kfd_ioctl.h:243:2: error: unknown type name 'uint64_t' uint64_t tile_config_ptr; /usr/include/linux/kfd_ioctl.h:245:2: error: unknown type name 'uint64_t' uint64_t macro_tile_config_ptr; /usr/include/linux/kfd_ioctl.h:249:2: error: unknown type name 'uint32_t' uint32_t num_tile_configs; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t num_macro_tile_configs; /usr/include/linux/kfd_ioctl.h:255:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:256:2: error: unknown type name 'uint32_t' uint32_t gb_addr_config; /* from KFD */ /usr/include/linux/kfd_ioctl.h:257:2: error: unknown type name 'uint32_t' uint32_t num_banks; /* from KFD */ /usr/include/linux/kfd_ioctl.h:258:2: error: unknown type name 'uint32_t' uint32_t num_ranks; /* from KFD */ Fixes: 6a1c9510694fe ("drm/amdkfd: Adding new IOCTL for scratch memory v2") Fixes: 5d71dbc3a5886 ("drm/amdkfd: Implement image tiling mode support v2") Signed-off-by: Dmitry V. Levin Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 731d0df722e3a..6e80501368aee 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -233,29 +233,29 @@ struct kfd_ioctl_wait_events_args { }; struct kfd_ioctl_set_scratch_backing_va_args { - uint64_t va_addr; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { /* to KFD: pointer to tile array */ - uint64_t tile_config_ptr; + __u64 tile_config_ptr; /* to KFD: pointer to macro tile array */ - uint64_t macro_tile_config_ptr; + __u64 macro_tile_config_ptr; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_tile_configs; + __u32 num_tile_configs; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_macro_tile_configs; + __u32 num_macro_tile_configs; - uint32_t gpu_id; /* to KFD */ - uint32_t gb_addr_config; /* from KFD */ - uint32_t num_banks; /* from KFD */ - uint32_t num_ranks; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ /* struct size can be extended later if needed * without breaking ABI compatibility */ From b4b591c87f2b0f4ebaf3a68d4f13873b241aa584 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:21 +0200 Subject: [PATCH 141/406] nvme-rdma: don't suppress send completions The entire completions suppress mechanism is currently broken because the HCA might retry a send operation (due to dropped ack) after the nvme transaction has completed. In order to handle this, we signal all send completions and introduce a separate done handler for async events as they will be handled differently (as they don't include in-capsule data by definition). Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 54 +++++++++++----------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2c597105a6bf3..61511bed8acaf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,7 +77,6 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -510,7 +509,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; - atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -1204,21 +1202,9 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -/* - * We want to signal completion at least every queue depth/2. This returns the - * largest power of two that is not above half of (queue size + 1) to optimize - * (avoid divisions). - */ -static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) -{ - int limit = 1 << ilog2((queue->queue_size + 1) / 2); - - return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; -} - static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, - struct ib_send_wr *first, bool flush) + struct ib_send_wr *first) { struct ib_send_wr wr, *bad_wr; int ret; @@ -1227,31 +1213,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, sge->length = sizeof(struct nvme_command), sge->lkey = queue->device->pd->local_dma_lkey; - qe->cqe.done = nvme_rdma_send_done; - wr.next = NULL; wr.wr_cqe = &qe->cqe; wr.sg_list = sge; wr.num_sge = num_sge; wr.opcode = IB_WR_SEND; - wr.send_flags = 0; - - /* - * Unsignalled send completions are another giant desaster in the - * IB Verbs spec: If we don't regularly post signalled sends - * the send queue will fill up and only a QP reset will rescue us. - * Would have been way to obvious to handle this in hardware or - * at least the RDMA stack.. - * - * Always signal the flushes. The magic request used for the flush - * sequencer is not allocated in our driver's tagset and it's - * triggered to be freed by blk_cleanup_queue(). So we need to - * always mark it as signaled to ensure that the "wr_cqe", which is - * embedded in request's payload, is not freed when __ib_process_cq() - * calls wr_cqe->done(). - */ - if (nvme_rdma_queue_sig_limit(queue) || flush) - wr.send_flags |= IB_SEND_SIGNALED; + wr.send_flags = IB_SEND_SIGNALED; if (first) first->next = ≀ @@ -1301,6 +1268,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) return queue->ctrl->tag_set.tags[queue_idx - 1]; } +static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "ASYNC"); +} + static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); @@ -1319,10 +1292,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) cmd->common.flags |= NVME_CMD_SGL_METABUF; nvme_rdma_set_sg_null(cmd); + sqe->cqe.done = nvme_rdma_async_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE); - ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); + ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL); WARN_ON_ONCE(ret); } @@ -1607,7 +1582,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_qe *sqe = &req->sqe; struct nvme_command *c = sqe->data; - bool flush = false; struct ib_device *dev; blk_status_t ret; int err; @@ -1636,13 +1610,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, goto err; } + sqe->cqe.done = nvme_rdma_send_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - if (req_op(rq) == REQ_OP_FLUSH) - flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; From 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:22 +0200 Subject: [PATCH 142/406] nvme-rdma: don't complete requests before a send work request has completed In order to guarantee that the HCA will never get an access violation (either from invalidated rkey or from iommu) when retrying a send operation we must complete a request only when both send completion and the nvme cqe has arrived. We need to set the send/recv completions flags atomically because we might have more than a single context accessing the request concurrently (one is cq irq-poll context and the other is user-polling used in IOCB_HIPRI). Only then we are safe to invalidate the rkey (if needed), unmap the host buffers, and complete the IO. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 61511bed8acaf..502f7c515a991 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -59,6 +59,9 @@ struct nvme_rdma_request { struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; + union nvme_result result; + __le16 status; + refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; @@ -1162,6 +1165,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; req->mr->need_inval = false; + refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1198,8 +1202,19 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_qe *qe = + container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); + struct nvme_rdma_request *req = + container_of(qe, struct nvme_rdma_request, sqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "SEND"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, @@ -1318,14 +1333,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->tag == tag) - ret = 1; + req->status = cqe->status; + req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - nvme_end_request(rq, cqe->status, cqe->result); + if (refcount_dec_and_test(&req->ref)) { + if (rq->tag == tag) + ret = 1; + nvme_end_request(rq, req->status, req->result); + } + return ret; } From 2f122e4f5107cf8ab0e592d63ed816a00110b4fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:23 +0200 Subject: [PATCH 143/406] nvme-rdma: wait for local invalidation before completing a request We must not complete a request before the host memory region is invalidated. Luckily we have send with invalidate protocol support so we usually don't need to execute it, but in case the target did not invalidate a memory region for us, we must wait for the invalidation to complete before unmapping host memory and completing the I/O. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 502f7c515a991..c03460d0ca94b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1019,8 +1019,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_request *req = + container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); + } static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, @@ -1031,7 +1041,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, .opcode = IB_WR_LOCAL_INV, .next = NULL, .num_sge = 0, - .send_flags = 0, + .send_flags = IB_SEND_SIGNALED, .ex.invalidate_rkey = req->mr->rkey, }; @@ -1045,24 +1055,12 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int res; if (!blk_rq_bytes(rq)) return; - if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { - res = nvme_rdma_inv_rkey(queue, req); - if (unlikely(res < 0)) { - dev_err(ctrl->ctrl.device, - "Queueing INV WR for rkey %#x failed (%d)\n", - req->mr->rkey, res); - nvme_rdma_error_recovery(queue->ctrl); - } - } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1337,8 +1335,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) + wc->ex.invalidate_rkey == req->mr->rkey) { req->mr->need_inval = false; + } else if (req->mr->need_inval) { + ret = nvme_rdma_inv_rkey(queue, req); + if (unlikely(ret < 0)) { + dev_err(queue->ctrl->ctrl.device, + "Queueing INV WR for rkey %#x failed (%d)\n", + req->mr->rkey, ret); + nvme_rdma_error_recovery(queue->ctrl); + } + /* the local invalidation completion will end the request */ + return 0; + } if (refcount_dec_and_test(&req->ref)) { if (rq->tag == tag) From 3ef0279bb0031f67537bd8972899a6a23d3064d7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:24 +0200 Subject: [PATCH 144/406] nvme-rdma: Check remotely invalidated rkey matches our expected rkey If we got a remote invalidation on a bogus rkey, this is a protocol error. Fail the connection in this case. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c03460d0ca94b..3a952d458e7c6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1334,8 +1334,13 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->status = cqe->status; req->result = cqe->result; - if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) { + if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { + if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { + dev_err(queue->ctrl->ctrl.device, + "Bogus remote invalidation for rkey %#x\n", + req->mr->rkey); + nvme_rdma_error_recovery(queue->ctrl); + } req->mr->need_inval = false; } else if (req->mr->need_inval) { ret = nvme_rdma_inv_rkey(queue, req); From f41725bbe16b0773302c0cc7dc2e89f54828712d Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 26 Nov 2017 10:40:55 +0000 Subject: [PATCH 145/406] nvme-rdma: Use mr pool Currently, blk_mq_tagset_iter() iterate over initial hctx tags only. If an I/O scheduler is used, it doesn't iterate the hctx scheduler tags and the static request aren't been updated. For example, while using NVMe over Fabrics RDMA host, this cause us not to reinit the scheduler requests and thus not re-register all the memory regions during the tagset re-initialization in the reconnect flow. This may lead to a memory registration error: "MEMREG for CQE 0xffff88044c14dce8 failed with status memory management operation error (6)" With this commit we don't need to reinit the requests, and thus fix this failure. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 95 ++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3a952d458e7c6..02ef0771f6b90 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -260,32 +261,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) return ret; } -static int nvme_rdma_reinit_request(void *data, struct request *rq) -{ - struct nvme_rdma_ctrl *ctrl = data; - struct nvme_rdma_device *dev = ctrl->device; - struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - int ret = 0; - - if (WARN_ON_ONCE(!req->mr)) - return 0; - - ib_dereg_mr(req->mr); - - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - req->mr = NULL; - goto out; - } - - req->mr->need_inval = false; - -out: - return ret; -} - static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx) { @@ -295,9 +270,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_device *dev = queue->device; - if (req->mr) - ib_dereg_mr(req->mr); - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); } @@ -319,21 +291,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (ret) return ret; - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - goto out_free_qe; - } - req->queue = queue; return 0; - -out_free_qe: - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - return -ENOMEM; } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -433,6 +393,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); + rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -442,6 +404,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +{ + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ibdev->attrs.max_fast_reg_page_list_len); +} + static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) { struct ib_device *ibdev; @@ -484,8 +452,22 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, + queue->queue_size, + IB_MR_TYPE_MEM_REG, + nvme_rdma_get_max_fr_pages(ibdev)); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_ring; + } + return 0; +out_destroy_ring: + nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, + sizeof(struct nvme_completion), DMA_FROM_DEVICE); out_destroy_qp: rdma_destroy_qp(queue->cm_id); out_destroy_ib_cq: @@ -757,8 +739,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); @@ -772,10 +753,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = PTR_ERR(ctrl->ctrl.admin_q); goto out_free_tagset; } - } else { - error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); - if (error) - goto out_free_queue; } error = nvme_rdma_start_queue(ctrl, 0); @@ -855,10 +832,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } } else { - ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); - if (ret) - goto out_free_io_queues; - blk_mq_update_nr_hw_queues(&ctrl->tag_set, ctrl->ctrl.queue_count - 1); } @@ -1061,6 +1034,11 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; + if (req->mr) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; + } + ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1117,12 +1095,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + /* * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (unlikely(nr < count)) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; if (nr < 0) return nr; return -EINVAL; @@ -1141,8 +1125,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->mr->need_inval = true; - sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); @@ -1162,7 +1144,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; - req->mr->need_inval = false; refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1341,8 +1322,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->mr->rkey); nvme_rdma_error_recovery(queue->ctrl); } - req->mr->need_inval = false; - } else if (req->mr->need_inval) { + } else if (req->mr) { ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1650,7 +1630,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, sizeof(struct nvme_command), DMA_TO_DEVICE); err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL); + req->mr ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; @@ -1798,7 +1778,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, - .reinit_request = nvme_rdma_reinit_request, }; static inline bool From 2734166e89639c973c6e125ac8bcfc2d9db72b70 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 25 Nov 2017 13:14:40 -0600 Subject: [PATCH 146/406] net: openvswitch: datapath: fix data type in queue_gso_packets gso_type is being used in binary AND operations together with SKB_GSO_UDP. The issue is that variable gso_type is of type unsigned short and SKB_GSO_UDP expands to more than 16 bits: SKB_GSO_UDP = 1 << 16 this makes any binary AND operation between gso_type and SKB_GSO_UDP to be always zero, hence making some code unreachable and likely causing undesired behavior. Fix this by changing the data type of variable gso_type to unsigned int. Addresses-Coverity-ID: 1462223 Fixes: 0c19f846d582 ("net: accept UFO datagrams from tuntap and packet") Signed-off-by: Gustavo A. R. Silva Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99cfafc2a139b..ef38e5aecd285 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; + unsigned int gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; From 67c8d22a73128ff910e2287567132530abcf5b71 Mon Sep 17 00:00:00 2001 From: zhangliping Date: Sat, 25 Nov 2017 22:02:12 +0800 Subject: [PATCH 147/406] openvswitch: fix the incorrect flow action alloc size If we want to add a datapath flow, which has more than 500 vxlan outputs' action, we will get the following error reports: openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Actions may not be safe on all matching packets ... ... It seems that we can simply enlarge the MAX_ACTIONS_BUFSIZE to fix it, but this is not the root cause. For example, for a vxlan output action, we need about 60 bytes for the nlattr, but after it is converted to the flow action, it only occupies 24 bytes. This means that we can still support more than 1000 vxlan output actions for a single datapath flow under the the current 32k max limitation. So even if the nla_len(attr) is larger than MAX_ACTIONS_BUFSIZE, we shouldn't report EINVAL and keep it move on, as the judgement can be done by the reserve_sfa_size. Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc424798ba6f3..624ea74353dd3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2241,14 +2241,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -2321,12 +2318,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -3059,7 +3059,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); From ed81cc612c92167fe7606773e036ee5ccef6c190 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 21 Nov 2017 17:09:59 +0100 Subject: [PATCH 148/406] hwmon: Drop reference to Jean's tree This tree has not been used for over a year, Guenter is taking all the hwmon patches in practice. Signed-off-by: Jean Delvare Cc: Guenter Roeck Signed-off-by: Guenter Roeck --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76d..533fbb03a95c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6174,7 +6174,6 @@ M: Jean Delvare M: Guenter Roeck L: linux-hwmon@vger.kernel.org W: http://hwmon.wiki.kernel.org/ -T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained F: Documentation/hwmon/ From 517f56839f581618d24f2e67a35738a5c6cbaecb Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Wed, 15 Nov 2017 08:24:46 +0000 Subject: [PATCH 149/406] drm/rockchip: dw-mipi-dsi: fix possible un-balanced runtime PM enable In the case where the bind gets deferred we would end up with a un-balanced runtime PM enable call. Fix this by simply moving the pm_runtime_enable call to the end of the bind function when all paths have succeeded. Signed-off-by: Mirza Krak Signed-off-by: Sandy Huang Link: https://patchwork.freedesktop.org/patch/msgid/1510734286-37434-1-git-send-email-mirza.krak@endian.se --- drivers/gpu/drm/rockchip/dw-mipi-dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c index 9a20b9dc27c83..f7fc652b00270 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c @@ -1275,8 +1275,6 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, goto err_pllref; } - pm_runtime_enable(dev); - dsi->dsi_host.ops = &dw_mipi_dsi_host_ops; dsi->dsi_host.dev = dev; ret = mipi_dsi_host_register(&dsi->dsi_host); @@ -1291,6 +1289,7 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, } dev_set_drvdata(dev, dsi); + pm_runtime_enable(dev); return 0; err_mipi_dsi_host: From fa56b3f83177e1ad60815f750dd3816bd4c25677 Mon Sep 17 00:00:00 2001 From: Vitor Massaru Iha Date: Mon, 30 Oct 2017 11:36:54 -0200 Subject: [PATCH 150/406] drm: Fix checkpatch issue: "WARNING: braces {} are not necessary for single statement blocks." Signed-off-by: Vitor Massaru Iha Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 72b22b805412b..5a5427bbd70e4 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -317,9 +317,8 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) formats, ARRAY_SIZE(formats), NULL, DRM_PLANE_TYPE_PRIMARY, NULL); - if (ret) { + if (ret) return ERR_PTR(ret); - } drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs); hdlcd->plane = plane; From f73e8b82531573a198a4d0e5bff0d3256cbbd1d8 Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: [PATCH 151/406] drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split into hdlcd and malidp specific patches] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 45b174fea36b2..0afb53b1f4e92 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -355,7 +355,7 @@ static int hdlcd_drm_bind(struct device *dev) err_free: drm_mode_config_cleanup(drm); dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -380,7 +380,7 @@ static void hdlcd_drm_unbind(struct device *dev) pm_runtime_disable(drm->dev); of_reserved_mem_device_release(drm->dev); drm_mode_config_cleanup(drm); - drm_dev_unref(drm); + drm_dev_put(drm); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); } From 7b6ddeaf27eca72795ceeae2f0f347db1b5f9a30 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Nov 2017 14:46:08 +0100 Subject: [PATCH 152/406] mac80211: use QoS NDP for AP probing When connected to a QoS/WMM AP, mac80211 should use a QoS NDP for probing it, instead of a regular non-QoS one, fix this. Change all the drivers to *not* allow QoS NDP for now, even though it looks like most of them should be OK with that. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/st/cw1200/sta.c | 4 ++-- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/cmd.c | 5 ++-- include/net/mac80211.h | 8 ++++++- net/mac80211/mlme.c | 2 +- net/mac80211/tx.c | 29 ++++++++++++++++++++++-- 7 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index dfb26f03c1a27..1b05b5d7a0386 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1113,7 +1113,7 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, if (!avp->assoc) return false; - skb = ieee80211_nullfunc_get(sc->hw, vif); + skb = ieee80211_nullfunc_get(sc->hw, vif, false); if (!skb) return false; diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index 03687a80d6e98..38678e9a05621 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -198,7 +198,7 @@ void __cw1200_cqm_bssloss_sm(struct cw1200_common *priv, priv->bss_loss_state++; - skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); WARN_ON(!skb); if (skb) cw1200_tx(priv->hw, NULL, skb); @@ -2265,7 +2265,7 @@ static int cw1200_upload_null(struct cw1200_common *priv) .rate = 0xFF, }; - frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); if (!frame.skb) return -ENOMEM; diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9915d83a4a305..6d02c660b4ab7 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -566,7 +566,7 @@ static int wl1251_build_null_data(struct wl1251 *wl) size = sizeof(struct wl12xx_null_data_template); ptr = NULL; } else { - skb = ieee80211_nullfunc_get(wl->hw, wl->vif); + skb = ieee80211_nullfunc_get(wl->hw, wl->vif, false); if (!skb) goto out; size = skb->len; diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 2bfc12fdc9292..761cf8573a805 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1069,7 +1069,8 @@ int wl12xx_cmd_build_null_data(struct wl1271 *wl, struct wl12xx_vif *wlvif) ptr = NULL; } else { skb = ieee80211_nullfunc_get(wl->hw, - wl12xx_wlvif_to_vif(wlvif)); + wl12xx_wlvif_to_vif(wlvif), + false); if (!skb) goto out; size = skb->len; @@ -1096,7 +1097,7 @@ int wl12xx_cmd_build_klv_null_data(struct wl1271 *wl, struct sk_buff *skb = NULL; int ret = -ENOMEM; - skb = ieee80211_nullfunc_get(wl->hw, vif); + skb = ieee80211_nullfunc_get(wl->hw, vif, false); if (!skb) goto out; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cc9073e45be90..eec143cca1c0f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4470,18 +4470,24 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @qos_ok: QoS NDP is acceptable to the caller, this should be set + * if at all possible * * Creates a Nullfunc template which can, for example, uploaded to * hardware. The template must be updated after association so that correct * BSSID and address is used. * + * If @qos_ndp is set and the association is to an AP with QoS/WMM, the + * returned packet will be QoS NDP. + * * Note: Caller (or hardware) is responsible for setting the * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. * * Return: The nullfunc template. %NULL on error. */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 04460440d7314..c244691deab9c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); if (!skb) return; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b8154474b9e6..3160954fc4060 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + bool qos_ok) { struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_managed *ifmgd; struct ieee80211_local *local; struct sk_buff *skb; + bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; @@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, ifmgd = &sdata->u.mgd; local = sdata->local; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (qos_ok) { + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, ifmgd->bssid); + qos = sta && sta->sta.wme; + rcu_read_unlock(); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); if (!skb) return NULL; @@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_TODS); + if (qos) { + __le16 qos = cpu_to_le16(7); + + BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_STYPE_NULLFUNC) != + IEEE80211_STYPE_QOS_NULLFUNC); + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC); + skb->priority = 7; + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb_put_data(skb, &qos, sizeof(qos)); + } + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); From fbbdad5edf0bb59786a51b94a9d006bc8c2da9a2 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 14 Nov 2017 23:20:05 +0800 Subject: [PATCH 153/406] mac80211: fix the update of path metric for RANN frame The previous path metric update from RANN frame has not considered the own link metric toward the transmitting mesh STA. Fix this. Reported-by: Michael65535 Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4f7826d7b47cd..4394463a0c2e6 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); From 72e2c3438ba3bd2ed640b6b5ea9e58993dd9ab7f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 29 Oct 2017 11:51:09 +0200 Subject: [PATCH 154/406] mac80211: tear down RX aggregations first When doing HW restart we tear down aggregations. Since at this point we are not TX'ing any aggregation, while the peer is still sending RX aggregation over the air, it will make sense to tear down the RX aggregations first. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 41f5e48f80219..167f83b853e6b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -292,7 +292,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - ___ieee80211_stop_tx_ba_session(sta, i, reason); ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && @@ -300,6 +299,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, } mutex_unlock(&sta->ampdu_mlme.mtx); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + ___ieee80211_stop_tx_ba_session(sta, i, reason); + /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Nov 2017 11:22:42 +0000 Subject: [PATCH 155/406] ARM: avoid faulting on qemu When qemu starts a kernel in a bare environment, the default SCR has the AW and FW bits clear, which means that the kernel can't modify the PSR A or PSR F bits, and means that FIQs and imprecise aborts are always masked. When running uboot under qemu, the AW and FW SCR bits are set, and the kernel functions normally - and this is how real hardware behaves. Fix this for qemu by ignoring the FIQ bit. Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode") Signed-off-by: Russell King --- arch/arm/kernel/entry-header.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 75f7a4e8541a9..e056c9a9aa9db 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,7 +299,7 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @@ -331,7 +331,7 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc From b77000ed558daa3bef0899d29bf171b8c9b5e6a8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Nov 2017 16:20:52 -0500 Subject: [PATCH 156/406] btrfs: fix deadlock when writing out space cache If we fail to prepare our pages for whatever reason (out of memory in our case) we need to make sure to drop the block_group->data_rwsem, otherwise hilarity ensues. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba [ add label and use existing unlocking code ] Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cdc9f4015ec36..4426d1c73e50f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1264,7 +1264,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1358,6 +1358,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); From 3aa623da789ff6edf789b5655debf33f50b3a9b2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:32 +0800 Subject: [PATCH 157/406] sctp: use sizeof(__u16) for each stream number length instead of magic number Now in stream reconf part there are still some places using magic number 2 for each stream number length. To make it more readable, this patch is to replace them with sizeof(__u16). Reported-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a11db21dc8a0e..09c797a10aaa1 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -563,7 +563,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( flags = SCTP_STREAM_RESET_INCOMING_SSN; } - nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); if (nums) { str_p = outreq->list_of_streams; for (i = 0; i < nums; i++) { @@ -627,7 +627,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( goto out; } - nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16); str_p = inreq->list_of_streams; for (i = 0; i < nums; i++) { if (ntohs(str_p[i]) >= stream->outcnt) { @@ -927,7 +927,8 @@ struct sctp_chunk *sctp_process_strreset_resp( outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; - nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; + nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / + sizeof(__u16); if (result == SCTP_STRRESET_PERFORMED) { if (nums) { @@ -956,7 +957,8 @@ struct sctp_chunk *sctp_process_strreset_resp( inreq = (struct sctp_strreset_inreq *)req; str_p = inreq->list_of_streams; - nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / + sizeof(__u16); *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); From d570a59c5b5f8fb3b65fa7b15ddb205a1d55f8d0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:33 +0800 Subject: [PATCH 158/406] sctp: only allow the out stream reset when the stream outq is empty Now the out stream reset in sctp stream reconf could be done even if the stream outq is not empty. It means that users can not be sure since which msg the new ssn will be used. To make this more synchronous, it shouldn't allow to do out stream reset until these chunks in unsent outq all are sent out. This patch checks the corresponding stream outqs when sending and processing the request . If any of them has unsent chunks in outq, it will return -EAGAIN instead or send SCTP_STRRESET_IN_PROGRESS back to the sender. Fixes: 7f9d68ac944e ("sctp: implement sender-side procedures for SSN Reset Request Parameter") Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 09c797a10aaa1..b20903712d672 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -254,6 +254,30 @@ static int sctp_send_reconf(struct sctp_association *asoc, return retval; } +static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, + __u16 str_nums, __be16 *str_list) +{ + struct sctp_association *asoc; + __u16 i; + + asoc = container_of(stream, struct sctp_association, stream); + if (!asoc->outqueue.out_qlen) + return true; + + if (!str_nums) + return false; + + for (i = 0; i < str_nums; i++) { + __u16 sid = ntohs(str_list[i]); + + if (stream->out[sid].ext && + !list_empty(&stream->out[sid].ext->outq)) + return false; + } + + return true; +} + int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { @@ -317,6 +341,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, for (i = 0; i < str_nums; i++) nstr_list[i] = htons(str_list[i]); + if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { + retval = -EAGAIN; + goto out; + } + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); kfree(nstr_list); @@ -636,6 +665,12 @@ struct sctp_chunk *sctp_process_strreset_inreq( } } + if (!sctp_stream_outq_is_empty(stream, nums, str_p)) { + result = SCTP_STRRESET_IN_PROGRESS; + asoc->strreset_inseq--; + goto err; + } + chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); if (!chunk) goto out; From 5c6144a0eb5366ae07fc5059301b139338f39bbd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:34 +0800 Subject: [PATCH 159/406] sctp: only allow the asoc reset when the asoc outq is empty As it says in rfc6525#section5.1.4, before sending the request, C2: The sender has either no outstanding TSNs or considers all outstanding TSNs abandoned. Prior to this patch, it tried to consider all outstanding TSNs abandoned by dropping all chunks in all outqs with sctp_outq_free (even including sacked, retransmit and transmitted queues) when doing this reset, which is too aggressive. To make it work gently, this patch will only allow the asoc reset when the sender has no outstanding TSNs by checking if unsent, transmitted and retransmit are all empty with sctp_outq_is_empty before sending and processing the request. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index b20903712d672..f3b7d2779c181 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -406,6 +406,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) if (asoc->strreset_outstanding) return -EINPROGRESS; + if (!sctp_outq_is_empty(&asoc->outqueue)) + return -EAGAIN; + chunk = sctp_make_strreset_tsnreq(asoc); if (!chunk) return -ENOMEM; @@ -728,6 +731,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( } goto err; } + + if (!sctp_outq_is_empty(&asoc->outqueue)) { + result = SCTP_STRRESET_IN_PROGRESS; + goto err; + } + asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) From 159f2a7456c6ae95c1e1a58e8b8ec65ef12d51cf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:35 +0800 Subject: [PATCH 160/406] sctp: avoid flushing unsent queue when doing asoc reset Now when doing asoc reset, it cleans up sacked and abandoned queues by calling sctp_outq_free where it also cleans up unsent, retransmit and transmitted queues. It's safe for the sender of response, as these 3 queues are empty at that time. But when the receiver of response is doing the reset, the users may already enqueue some chunks into unsent during the time waiting the response, and these chunks should not be flushed. To void the chunks in it would be removed, it moves the queue into a temp list, then gets it back after sctp_outq_free is done. The patch also fixes some incorrect comments in sctp_process_strreset_tsnreq. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f3b7d2779c181..9dd5bfe3860c3 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -747,9 +747,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( goto out; } - /* G3: The same processing as though a SACK chunk with no gap report - * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were - * received MUST be performed. + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); @@ -764,10 +765,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); - /* G4: The same processing as though a FWD-TSN chunk (as defined in - * [RFC3758]) with all streams affected and a new cumulative TSN - * ACK of the Receiver's Next TSN minus 1 were received MUST be - * performed. + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. */ sctp_outq_free(&asoc->outqueue); @@ -1021,6 +1021,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) { __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( &asoc->peer.tsn_map); + LIST_HEAD(temp); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); @@ -1029,7 +1030,13 @@ struct sctp_chunk *sctp_process_strreset_resp( SCTP_TSN_MAP_INITIAL, stsn, GFP_ATOMIC); + /* Clean up sacked and abandoned queues only. As the + * out_chunk_list may not be empty, splice it to temp, + * then get it back after sctp_outq_free is done. + */ + list_splice_init(&asoc->outqueue.out_chunk_list, &temp); sctp_outq_free(&asoc->outqueue); + list_splice_init(&temp, &asoc->outqueue.out_chunk_list); asoc->next_tsn = rtsn; asoc->ctsn_ack_point = asoc->next_tsn - 1; From 52a395896a051a3d5c34fba67c324f69ec5e67c6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:36 +0800 Subject: [PATCH 161/406] sctp: set sender next_tsn for the old result with ctsn_ack_point plus 1 When doing asoc reset, if the sender of the response has already sent some chunk and increased asoc->next_tsn before the duplicate request comes, the response will use the old result with an incorrect sender next_tsn. Better than asoc->next_tsn, asoc->ctsn_ack_point can't be changed after the sender of the response has performed the asoc reset and before the peer has confirmed it, and it's value is still asoc->next_tsn original value minus 1. This patch sets sender next_tsn for the old result with ctsn_ack_point plus 1 when processing the duplicate request, to make sure the sender next_tsn value peer gets will be always right. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 9dd5bfe3860c3..a20145b3a9494 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -725,7 +725,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) { - next_tsn = asoc->next_tsn; + next_tsn = asoc->ctsn_ack_point + 1; init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; } From b7e5a591502b03b4a1408bb93a15968d6ea446ce Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 20 Nov 2017 10:33:09 -0800 Subject: [PATCH 162/406] RISC-V: Remove __vdso_cmpxchg{32,64} symbol versions These were left over from an earlier version of the port. Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8c9dce95c11d4..3ac08eebd11df 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,8 +70,6 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; - __vdso_cmpxchg32; - __vdso_cmpxchg64; local: *; }; } From 28dfbe6ed483e8a589cce88095d7787d61bf9c16 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Oct 2017 15:42:14 -0700 Subject: [PATCH 163/406] RISC-V: Add VDSO entries for clock_get/gettimeofday/getcpu For now these are just placeholders that execute the syscall. We will later optimize them to avoid kernel crossings, but we'd like to have the VDSO entries from the first released kernel version to make the ABI simpler. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 6 +++++- arch/riscv/kernel/vdso/clock_getres.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/clock_gettime.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/getcpu.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/gettimeofday.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 4 ++++ 6 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/vdso/clock_getres.S create mode 100644 arch/riscv/kernel/vdso/clock_gettime.S create mode 100644 arch/riscv/kernel/vdso/getcpu.S create mode 100644 arch/riscv/kernel/vdso/gettimeofday.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 523d0a8ac8db7..2dcc4f3070bc7 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,7 +1,11 @@ # Copied from arch/tile/kernel/vdso/Makefile # Symbols present in the vdso -vdso-syms = rt_sigreturn +vdso-syms = rt_sigreturn +vdso-syms += gettimeofday +vdso-syms += clock_gettime +vdso-syms += clock_getres +vdso-syms += getcpu # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S new file mode 100644 index 0000000000000..edf7e2339648a --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_getres.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__vdso_clock_getres) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_getres + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S new file mode 100644 index 0000000000000..aac65676c6d55 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_gettime.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__vdso_clock_gettime) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_gettime + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 0000000000000..cc7e98924484a --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S new file mode 100644 index 0000000000000..da85d33e8990c --- /dev/null +++ b/arch/riscv/kernel/vdso/gettimeofday.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__vdso_gettimeofday) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_gettimeofday + ecall + ret + .cfi_endproc +ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 3ac08eebd11df..c7543c6a00f9c 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,6 +70,10 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; local: *; }; } From eae8d82529dd9820e206ecba0047b806c4410e65 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 6 Nov 2017 10:43:18 +0800 Subject: [PATCH 164/406] btrfs: Fix wild memory access in compression level parser [BUG] Kernel panic when mounting with "-o compress" mount option. KASAN will report like: ------ ================================================================== BUG: KASAN: wild-memory-access in strncmp+0x31/0xc0 Read of size 1 at addr d86735fce994f800 by task mount/662 ... Call Trace: dump_stack+0xe3/0x175 kasan_report+0x163/0x370 __asan_load1+0x47/0x50 strncmp+0x31/0xc0 btrfs_compress_str2level+0x20/0x70 [btrfs] btrfs_parse_options+0xff4/0x1870 [btrfs] open_ctree+0x2679/0x49f0 [btrfs] btrfs_mount+0x1b7f/0x1d30 [btrfs] mount_fs+0x49/0x190 vfs_kern_mount.part.29+0xba/0x280 vfs_kern_mount+0x13/0x20 btrfs_mount+0x31e/0x1d30 [btrfs] mount_fs+0x49/0x190 vfs_kern_mount.part.29+0xba/0x280 do_mount+0xaad/0x1a00 SyS_mount+0x98/0xe0 entry_SYSCALL_64_fastpath+0x1f/0xbe ------ [Cause] For 'compress' and 'compress_force' options, its token doesn't expect any parameter so its args[0] contains uninitialized data. Accessing args[0] will cause above wild memory access. [Fix] For Opt_compress and Opt_compress_force, set compression level to the default. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ set the default in advance ] Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- fs/btrfs/compression.h | 2 ++ fs/btrfs/super.c | 13 +++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4a78e57263373..5982c8a71f02f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1529,5 +1529,5 @@ unsigned int btrfs_compress_str2level(const char *str) if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0) return str[5] - '0'; - return 0; + return BTRFS_ZLIB_DEFAULT_LEVEL; } diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 93c5b82ae97ec..0868cc554f145 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -34,6 +34,8 @@ /* Maximum size of data before compression */ #define BTRFS_MAX_UNCOMPRESSED (SZ_128K) +#define BTRFS_ZLIB_DEFAULT_LEVEL 3 + struct compressed_bio { /* number of bios pending for this compressed extent */ refcount_t pending_bios; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65af029559b58..ff3545e526f5d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -507,9 +507,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { compress_type = "zlib"; + info->compress_type = BTRFS_COMPRESS_ZLIB; - info->compress_level = - btrfs_compress_str2level(args[0].from); + info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; + /* + * args[0] contains uninitialized data since + * for these tokens we don't expect any + * parameter. + */ + if (token != Opt_compress && + token != Opt_compress_force) + info->compress_level = + btrfs_compress_str2level(args[0].from); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); From 9f97df50c52c2887432debb6238f4e43567386a5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: [PATCH 165/406] reiserfs: remove unneeded i_version bump The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702f..4885c7b6e44fb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2591,7 +2591,6 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; From e872fa94662d0644057c7c80b3071bdb9249e5ab Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:49 +0000 Subject: [PATCH 166/406] KVM: lapic: Split out x2apic ldr calculation Split out the ldr calculation from kvm_apic_set_x2apic_id since we're about to reuse it in the following patch. Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 943acbf00c69d..e2edb11030025 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); WARN_ON_ONCE(id != apic->vcpu->vcpu_id); From 12806ba937382fdfdbad62a399aa2dce65c10fcd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:50 +0000 Subject: [PATCH 167/406] KVM: lapic: Fixup LDR on load in x2apic In x2apic mode the LDR is fixed based on the ID rather than separately loadable like it was before x2. When kvm_apic_set_state is called, the base is set, and if it has the X2APIC_ENABLE flag set then the LDR is calculated; however that value gets overwritten by the memcpy a few lines below overwriting it with the value that came from userland. The symptom is a lack of EOI after loading the state (e.g. after a QEMU migration) and is due to the EOI bitmap being wrong due to the incorrect LDR. This was seen with a Win2016 guest under Qemu with irqchip=split whose USB mouse didn't work after a VM migration. This corresponds to RH bug: https://bugzilla.redhat.com/show_bug.cgi?id=1502591 Reported-by: Yiqian Wei Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org [Applied fixup from Liran Alon. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2edb11030025..e2c1fb8d35cea 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2250,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2260,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; From e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: [PATCH 168/406] KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4552427105f69..f49fe514d1b2e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1798,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); From c37c28730bb031cc8a44a130c2555c0f3efbe2d0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: [PATCH 169/406] KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0d59dbe430c82..04f19b0996177 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5602,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); From b74558259c5149e5edd79348b70eb34177cbeea0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 22 Nov 2017 14:04:00 -0800 Subject: [PATCH 170/406] KVM: VMX: Fix vmx->nested freeing when no SMI handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 2939 at arch/x86/kvm/vmx.c:3844 free_loaded_vmcs+0x77/0x80 [kvm_intel] CPU: 5 PID: 2939 Comm: repro Not tainted 4.14.0+ #26 RIP: 0010:free_loaded_vmcs+0x77/0x80 [kvm_intel] Call Trace: vmx_free_vcpu+0xda/0x130 [kvm_intel] kvm_arch_destroy_vm+0x192/0x290 [kvm] kvm_put_kvm+0x262/0x560 [kvm] kvm_vm_release+0x2c/0x30 [kvm] __fput+0x190/0x370 task_work_run+0xa1/0xd0 do_exit+0x4d2/0x13e0 do_group_exit+0x89/0x140 get_signal+0x318/0xb80 do_signal+0x8c/0xb40 exit_to_usermode_loop+0xe4/0x140 syscall_return_slowpath+0x206/0x230 entry_SYSCALL_64_fastpath+0x98/0x9a The syzkaller testcase will execute VMXON/VMLAUCH instructions, so the vmx->nested stuff is populated, it will also issue KVM_SMI ioctl. However, the testcase is just a simple c program and not be lauched by something like seabios which implements smi_handler. Commit 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) gets out of guest mode and set nested.vmxon to false for the duration of SMM according to SDM 34.14.1 "leave VMX operation" upon entering SMM. We can't alloc/free the vmx->nested stuff each time when entering/exiting SMM since it will induce more overhead. So the function vmx_pre_enter_smm() marks nested.vmxon false even if vmx->nested stuff is still populated. What it expected is em_rsm() can mark nested.vmxon to be true again. However, the smi_handler/rsm will not execute since there is no something like seabios in this scenario. The function free_nested() fails to free the vmx->nested stuff since the vmx->nested.vmxon is false which results in the above warning. This patch fixes it by also considering the no SMI handler case, luckily vmx->nested.smm.vmxon is marked according to the value of vmx->nested.vmxon in vmx_pre_enter_smm(), we can take advantage of it and free vmx->nested stuff when L1 goes down. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Reviewed-by: Liran Alon Fixes: 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 04f19b0996177..4704aaf6d19e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7415,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) */ static void free_nested(struct vcpu_vmx *vmx) { - if (!vmx->nested.vmxon) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; From ebb70442cdd4872260c2415929c456be3562da82 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Nov 2017 14:35:40 -0700 Subject: [PATCH 171/406] Btrfs: fix list_add corruption and soft lockups in fsync Xfstests btrfs/146 revealed this corruption, [ 58.138831] Buffer I/O error on dev dm-0, logical block 2621424, async page read [ 58.151233] BTRFS error (device sdf): bdev /dev/mapper/error-test errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 [ 58.152403] list_add corruption. prev->next should be next (ffff88005e6775d8), but was ffffc9000189be88. (prev=ffffc9000189be88). [ 58.153518] ------------[ cut here ]------------ [ 58.153892] WARNING: CPU: 1 PID: 1287 at lib/list_debug.c:31 __list_add_valid+0x169/0x1f0 ... [ 58.157379] RIP: 0010:__list_add_valid+0x169/0x1f0 ... [ 58.161956] Call Trace: [ 58.162264] btrfs_log_inode_parent+0x5bd/0xfb0 [btrfs] [ 58.163583] btrfs_log_dentry_safe+0x60/0x80 [btrfs] [ 58.164003] btrfs_sync_file+0x4c2/0x6f0 [btrfs] [ 58.164393] vfs_fsync_range+0x5f/0xd0 [ 58.164898] do_fsync+0x5a/0x90 [ 58.165170] SyS_fsync+0x10/0x20 [ 58.165395] entry_SYSCALL_64_fastpath+0x1f/0xbe ... It turns out that we could record btrfs_log_ctx:io_err in log_one_extents when IO fails, but make log_one_extents() return '0' instead of -EIO, so the IO error is not acknowledged by the callers, i.e. btrfs_log_inode_parent(), which would remove btrfs_log_ctx:list from list head 'root->log_ctxs'. Since btrfs_log_ctx is allocated from stack memory, it'd get freed with a object alive on the list. then a future list_add will throw the above warning. This returns the correct error in the above case. Jeff also reported this while testing against his fsync error patch set[1]. [1]: https://www.spinics.net/lists/linux-btrfs/msg65308.html "btrfs list corruption and soft lockups while testing writeback error handling" Fixes: 8407f553268a4611f254 ("Btrfs: fix data corruption after fast fsync and writeback error") Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 +++-- fs/btrfs/tree-log.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2de7b4f4ca3c7..eb1bac7c8553c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2057,6 +2057,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); + btrfs_init_log_ctx(&ctx, inode); + /* * We write the dirty pages in the range and wait until they complete * out of the ->i_mutex. If so, we can flush the dirty pages by @@ -2203,8 +2205,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx, inode); - ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ @@ -2262,6 +2262,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = btrfs_end_transaction(trans); } out: + ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); if (!ret) ret = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index aa7c71cff575a..7bf9b31561db1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4102,7 +4102,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (ordered_io_err) { ctx->io_err = -EIO; - return 0; + return ctx->io_err; } btrfs_init_map_token(&token); From 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 24 Nov 2017 22:39:01 +0100 Subject: [PATCH 172/406] KVM: Let KVM_SET_SIGNAL_MASK work as advertised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that "any unblocked signal received [...] will cause KVM_RUN to return with -EINTR" and that "the signal will only be delivered if not blocked by the original signal mask". This, however, is only true, when the calling task has a signal handler registered for a signal. If not, signal evaluation is short-circuited for SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN returning or the whole process is terminated. Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar to that in do_sigtimedwait() to avoid short-circuiting of signals. Signed-off-by: Jan H. Schönherr Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 7 ++----- arch/powerpc/kvm/powerpc.c | 7 ++----- arch/s390/kvm/kvm-s390.c | 7 ++----- arch/x86/kvm/x86.c | 7 ++----- include/linux/kvm_host.h | 3 +++ virt/kvm/arm/arm.c | 8 +++----- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d535edc014341..75fdeaa8c62f2 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; - sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) @@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b6c53c42ac94..1915e86cef6f8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; - sigset_t sigsaved; if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; @@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (run->immediate_exit) r = -EINTR; else r = kvmppc_vcpu_run(run, vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98ad8b9e03609..9614aea5839b6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3372,7 +3372,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; - sigset_t sigsaved; if (kvm_run->immediate_exit) return -EINTR; @@ -3382,8 +3381,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); @@ -3417,8 +3415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; return rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f49fe514d1b2e..eee8e7faf1af5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7267,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; fpu__initialize(fpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7315,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2e754b7c282c8..893d6d606cd0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_sigset_activate(struct kvm_vcpu *vcpu); +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a6524ff27de49..a67c106d73f5c 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -615,7 +615,6 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; - sigset_t sigsaved; if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; @@ -633,8 +632,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (run->immediate_exit) return -EINTR; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; @@ -769,8 +767,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_update_run(vcpu); } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); + return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2dd1a9ca45998..c01cff064ec5a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2065,6 +2065,29 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +void kvm_sigset_activate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + /* + * This does a lockless modification of ->real_blocked, which is fine + * because, only current can change ->real_blocked and all readers of + * ->real_blocked don't care as long ->real_blocked is always a subset + * of ->blocked. + */ + sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); +} + +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); + sigemptyset(¤t->real_blocked); +} + static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { unsigned int old, val, grow; From 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: [PATCH 173/406] xfs: always free inline data before resetting inode fork during ifree In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 61d1cb7dc10d2..8012741266488 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2400,6 +2400,24 @@ xfs_ifree_cluster( return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2437,6 +2455,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; From 509955823cc9cc225c05673b1b83d70ca70c5c60 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Nov 2017 20:53:02 -0800 Subject: [PATCH 174/406] xfs: log recovery should replay deferred ops in order As part of testing log recovery with dm_log_writes, Amir Goldstein discovered an error in the deferred ops recovery that lead to corruption of the filesystem metadata if a reflink+rmap filesystem happened to shut down midway through a CoW remap: "This is what happens [after failed log recovery]: "Phase 1 - find and verify superblock... "Phase 2 - using internal log " - zero log... " - scan filesystem freespace and inode maps... " - found root inode chunk "Phase 3 - for each AG... " - scan (but don't clear) agi unlinked lists... " - process known inodes and perform inode discovery... " - agno = 0 "data fork in regular inode 134 claims CoW block 376 "correcting nextents for inode 134 "bad data fork in inode 134 "would have cleared inode 134" Hou Tao dissected the log contents of exactly such a crash: "According to the implementation of xfs_defer_finish(), these ops should be completed in the following sequence: "Have been done: "(1) CUI: Oper (160) "(2) BUI: Oper (161) "(3) CUD: Oper (194), for CUI Oper (160) "(4) RUI A: Oper (197), free rmap [0x155, 2, -9] "Should be done: "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI A "(8) RUD: for RUI B "Actually be done by xlog_recover_process_intents() "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI B "(8) RUD: for RUI A "So the rmap entry [0x155, 2, -9] for COW should be freed firstly, then a new rmap entry [0x155, 2, 137] will be added. However, as we can see from the log record in post_mount.log (generated after umount) and the trace print, the new rmap entry [0x155, 2, 137] are added firstly, then the rmap entry [0x155, 2, -9] are freed." When reconstructing the internal log state from the log items found on disk, it's required that deferred ops replay in exactly the same order that they would have had the filesystem not gone down. However, replaying unfinished deferred ops can create /more/ deferred ops. These new deferred ops are finished in the wrong order. This causes fs corruption and replay crashes, so let's create a single defer_ops to handle the subsequent ops created during replay, then use one single transaction at the end of log recovery to ensure that everything is replayed in the same order as they're supposed to be. Reported-by: Amir Goldstein Analyzed-by: Hou Tao Reviewed-by: Christoph Hellwig Tested-by: Amir Goldstein Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_item.c | 23 ++++-------- fs/xfs/xfs_bmap_item.h | 3 +- fs/xfs/xfs_log_recover.c | 75 ++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_refcount_item.c | 21 ++++------- fs/xfs/xfs_refcount_item.h | 3 +- 5 files changed, 85 insertions(+), 40 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4a..e5fb008d75e89 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init( int xfs_bui_recover( struct xfs_mount *mp, - struct xfs_bui_log_item *buip) + struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops) { int error = 0; unsigned int bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover( xfs_exntst_t state; struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec irec; - xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover( if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover( break; default: error = -EFSCORRUPTED; - goto err_dfops; + goto err_inode; } xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); if (error) - goto err_dfops; + goto err_inode; if (count > 0) { ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); if (error) - goto err_dfops; + goto err_inode; } - /* Finish transaction, free inodes. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto err_dfops; - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover( return error; -err_dfops: - xfs_defer_cancel(&dfops); err_inode: xfs_trans_cancel(tp); if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3ce..24b354a2c8364 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87b1c331f9ebf..28d1abfe835ee 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" @@ -4716,7 +4717,8 @@ STATIC int xlog_recover_process_cui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_cui_log_item *cuip; int error; @@ -4729,7 +4731,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_cui_recover(mp, cuip); + error = xfs_cui_recover(mp, cuip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4756,7 +4758,8 @@ STATIC int xlog_recover_process_bui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_bui_log_item *buip; int error; @@ -4769,7 +4772,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_bui_recover(mp, buip); + error = xfs_bui_recover(mp, buip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) } } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops) +{ + struct xfs_trans *tp; + int64_t freeblks; + uint resblks; + int error; + + /* + * We're finishing the defer_ops that accumulated as a result of + * recovering unfinished intent items during log recovery. We + * reserve an itruncate transaction because it is the largest + * permanent transaction type. Since we're the only user of the fs + * right now, take 93% (15/16) of the available free blocks. Use + * weird math to avoid a 64-bit division. + */ + freeblks = percpu_counter_sum(&mp->m_fdblocks); + if (freeblks <= 0) + return -ENOSPC; + resblks = min_t(int64_t, UINT_MAX, freeblks); + resblks = (resblks * 15) >> 4; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + error = xfs_defer_finish(&tp, dfops); + if (error) + goto out_cancel; + + return xfs_trans_commit(tp); + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -4825,10 +4868,12 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_log_item *lip; - int error = 0; + struct xfs_defer_ops dfops; struct xfs_ail_cursor cur; + struct xfs_log_item *lip; struct xfs_ail *ailp; + xfs_fsblock_t firstfsb; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif @@ -4839,6 +4884,7 @@ xlog_recover_process_intents( #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif + xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4859,6 +4905,12 @@ xlog_recover_process_intents( */ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + /* + * NOTE: If your intent processing routine can create more + * deferred ops, you /must/ attach them to the dfops in this + * routine or else those subsequent intents will get + * replayed in the wrong order! + */ switch (lip->li_type) { case XFS_LI_EFI: error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4867,10 +4919,12 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip); + error = xlog_recover_process_cui(log->l_mp, ailp, lip, + &dfops); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip); + error = xlog_recover_process_bui(log->l_mp, ailp, lip, + &dfops); break; } if (error) @@ -4880,6 +4934,11 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); + if (error) + xfs_defer_cancel(&dfops); + else + error = xlog_finish_defer_ops(log->l_mp, &dfops); + return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255d..3a55d6fc271b1 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init( int xfs_cui_recover( struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops) { int i; int error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; enum xfs_refcount_intent_type type; - xfs_fsblock_t firstfsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; struct xfs_bmbt_irec irec; - struct xfs_defer_ops dfops; bool requeue_only = false; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover( return error; cudp = xfs_trans_get_cud(tp, cuip); - xfs_defer_init(&dfops, &firstfsb); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - &dfops, type, refc->pe_startblock, refc->pe_len, + dfops, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_increase_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_decrease_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_ALLOC_COW: error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: error = xfs_refcount_free_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; @@ -525,17 +523,12 @@ xfs_cui_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64be..0e5327349a13e 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops); #endif /* __XFS_REFCOUNT_ITEM_H__ */ From 6e0c9507bf51e1517a80ad0ac171e5402528fcef Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 12:28:17 +0100 Subject: [PATCH 175/406] i2c: i801: Fix Failed to allocate irq -2147483648 error On Apollo Lake devices the BIOS does not set up IRQ routing for the i801 SMBUS controller IRQ, so we end up with dev->irq set to IRQ_NOTCONNECTED. Detect this and do not try to use the irq in this case silencing: i801_smbus 0000:00:1f.1: Failed to allocate irq -2147483648: -107 Cc: stable@vger.kernel.org BugLink: https://communities.intel.com/thread/114759 Signed-off-by: Hans de Goede Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9e12a53ef7b8c..8eac00efadc1a 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1617,6 +1617,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; + if (dev->irq == IRQ_NOTCONNECTED) + priv->features &= ~FEATURE_IRQ; + if (priv->features & FEATURE_IRQ) { u16 pcictl, pcists; From 66a7c84d677e8e4a5a2ef4afdb9bd52e1399a866 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 17:52:24 +0000 Subject: [PATCH 176/406] i2c: i2c-boardinfo: fix memory leaks on devinfo Currently when an error occurs devinfo is still allocated but is unused when the error exit paths break out of the for-loop. Fix this by kfree'ing devinfo to avoid the leak. Detected by CoverityScan, CID#1416590 ("Resource Leak") Fixes: 4124c4eba402 ("i2c: allow attaching IRQ resources to i2c_board_info") Fixes: 0daaf99d8424 ("i2c: copy device properties when using i2c_register_board_info()") Signed-off-by: Colin Ian King Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-boardinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 31186ead5a407..509a6007cdf65 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -86,6 +86,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig property_entries_dup(info->properties); if (IS_ERR(devinfo->board_info.properties)) { status = PTR_ERR(devinfo->board_info.properties); + kfree(devinfo); break; } } @@ -98,6 +99,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig GFP_KERNEL); if (!devinfo->board_info.resources) { status = -ENOMEM; + kfree(devinfo); break; } } From 2967acbb257a6a9bf912f4778b727e00972eac9b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Nov 2017 11:52:55 -0700 Subject: [PATCH 177/406] blktrace: fix trace mutex deadlock A previous commit changed the locking around registration/cleanup, but direct callers of blk_trace_remove() were missed. This means that if we hit the error path in setup, we will deadlock on attempting to re-acquire the queue trace mutex. Fixes: 1f2cac107c59 ("blktrace: fix unlocked access to init/start-stop/teardown") Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c5987d4c5f23b..987d9a9ae2839 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } return 0; @@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } From 5b5971df3bc2775107ddad164018a8a8db633b81 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Thu, 23 Nov 2017 15:18:35 +0100 Subject: [PATCH 178/406] xen-netfront: remove warning when unloading module v2: * Replace busy wait with wait_event()/wake_up_all() * Cannot garantee that at the time xennet_remove is called, the xen_netback state will not be XenbusStateClosed, so added a condition for that * There's a small chance for the xen_netback state is XenbusStateUnknown by the time the xen_netfront switches to Closed, so added a condition for that. When unloading module xen_netfront from guest, dmesg would output warning messages like below: [ 105.236836] xen:grant_table: WARNING: g.e. 0x903 still in use! [ 105.236839] deferring g.e. 0x903 (pfn 0x35805) This problem relies on netfront and netback being out of sync. By the time netfront revokes the g.e.'s netback didn't have enough time to free all of them, hence displaying the warnings on dmesg. The trick here is to make netfront to wait until netback frees all the g.e.'s and only then continue to cleanup for the module removal, and this is done by manipulating both device states. Signed-off-by: Eduardo Otubo Acked-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8b8689c6d8877..391432e2725db 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,6 +87,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -2021,10 +2023,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2130,6 +2134,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); From 2e724dca7749223204bbae21745c0e3fc932700a Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 27 Nov 2017 20:13:39 +0100 Subject: [PATCH 179/406] tipc: eliminate access after delete in group_filter_msg() KASAN revealed another access after delete in group.c. This time it found that we read the header of a received message after the buffer has been released. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 12777cac638a6..95fec2c057d6e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -497,6 +497,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, while ((skb = skb_peek(defq))) { hdr = buf_msg(skb); mtyp = msg_type(hdr); + blks = msg_blocks(hdr); deliver = true; ack = false; update = false; @@ -546,7 +547,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!update) continue; - blks = msg_blocks(hdr); tipc_group_update_rcv_win(grp, blks, node, port, xmitq); } return; From a8f97366452ed491d13cf1e44241bc0b5740b1f0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 27 Nov 2017 06:21:25 +0300 Subject: [PATCH 180/406] mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d() Currently, we unconditionally make page table dirty in touch_pmd(). It may result in false-positive can_follow_write_pmd(). We may avoid the situation, if we would only make the page table entry dirty if caller asks for write access -- FOLL_WRITE. The patch also changes touch_pud() in the same way. Signed-off-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86fe697e8bfb3..0e7ded98d114d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) + pmd_t *pmd, int flags) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pmd is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pmd to - * set the young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); /* * device mapped pages can only be returned if the @@ -995,20 +990,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static void touch_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud) + pud_t *pud, int flags) { pud_t _pud; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pud is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pud to - * set the young bit, instead of the current set_pud_at. - */ - _pud = pud_mkyoung(pud_mkdirty(*pud)); + _pud = pud_mkyoung(*pud); + if (flags & FOLL_WRITE) + _pud = pud_mkdirty(_pud); if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, - pud, _pud, 1)) + pud, _pud, flags & FOLL_WRITE)) update_mmu_cache_pud(vma, addr, pud); } @@ -1031,7 +1021,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pud(vma, addr, pud); + touch_pud(vma, addr, pud, flags); /* * device mapped pages can only be returned if the @@ -1424,7 +1414,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid From 152e93af3cfe2d29d8136cc0a02a8612507136ee Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 27 Nov 2017 06:21:26 +0300 Subject: [PATCH 181/406] mm, thp: Do not make pmd/pud dirty without a reason Currently we make page table entries dirty all the time regardless of access type and don't even consider if the mapping is write-protected. The reasoning is that we don't really need dirty tracking on THP and making the entry dirty upfront may save some time on first write to the page. Unfortunately, such approach may result in false-positive can_follow_write_pmd() for huge zero page or read-only shmem file. Let's only make page dirty only if we about to write to the page anyway (as we do for small pages). I've restructured the code to make entry dirty inside maybe_p[mu]d_mkwrite(). It also takes into account if the vma is write-protected. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 31 +++++++++++++++++++------------ mm/internal.h | 3 ++- mm/khugepaged.c | 2 +- mm/memory.c | 2 +- mm/migrate.c | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0e7ded98d114d..f22401fd83b5c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -474,10 +474,13 @@ static int __init setup_transparent_hugepage(char *str) } __setup("transparent_hugepage=", setup_transparent_hugepage); -pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty) { - if (likely(vma->vm_flags & VM_WRITE)) + if (likely(vma->vm_flags & VM_WRITE)) { pmd = pmd_mkwrite(pmd); + if (dirty) + pmd = pmd_mkdirty(pmd); + } return pmd; } @@ -599,7 +602,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } entry = mk_huge_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); page_add_new_anon_rmap(page, vma, haddr, true); mem_cgroup_commit_charge(page, memcg, false, true); lru_cache_add_active_or_unevictable(page, vma); @@ -741,8 +744,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); if (write) { - entry = pmd_mkyoung(pmd_mkdirty(entry)); - entry = maybe_pmd_mkwrite(entry, vma); + entry = pmd_mkyoung(entry); + entry = maybe_pmd_mkwrite(entry, vma, true); } if (pgtable) { @@ -788,10 +791,14 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) +static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma, + bool dirty) { - if (likely(vma->vm_flags & VM_WRITE)) + if (likely(vma->vm_flags & VM_WRITE)) { pud = pud_mkwrite(pud); + if (dirty) + pud = pud_mkdirty(pud); + } return pud; } @@ -807,8 +814,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); if (write) { - entry = pud_mkyoung(pud_mkdirty(entry)); - entry = maybe_pud_mkwrite(entry, vma); + entry = pud_mkyoung(entry); + entry = maybe_pud_mkwrite(entry, vma, true); } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); @@ -1279,7 +1286,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); ret |= VM_FAULT_WRITE; @@ -1349,7 +1356,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) } else { pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); page_add_new_anon_rmap(new_page, vma, haddr, true); mem_cgroup_commit_charge(new_page, memcg, false, true); @@ -2928,7 +2935,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (is_write_migration_entry(entry)) - pmde = maybe_pmd_mkwrite(pmde, vma); + pmde = maybe_pmd_mkwrite(pmde, vma, false); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); page_add_anon_rmap(new, vma, mmun_start, true); diff --git a/mm/internal.h b/mm/internal.h index e6bd35182daee..b35cdebda0cef 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -328,7 +328,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) } } -extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); +extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, + bool dirty); /* * At what user virtual address is page expected in @vma? diff --git a/mm/khugepaged.c b/mm/khugepaged.c index ea4ff259b6719..db43dc8a8ae61 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm, pgtable = pmd_pgtable(_pmd); _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); - _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); + _pmd = maybe_pmd_mkwrite(_pmd, vma, false); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/memory.c b/mm/memory.c index 85e7a87da79fe..b10c1d26f675b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); page_add_file_rmap(page, true); diff --git a/mm/migrate.c b/mm/migrate.c index 4d0be47a322a8..57865fc8cfe33 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, } entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, false); /* * Clear the old entry under pagetable lock and establish the new PTE. From 141cbfba1d0502006463aa80f57c64086226af1a Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 10 Aug 2017 10:53:53 +0200 Subject: [PATCH 182/406] auxdisplay: img-ascii-lcd: Only build on archs that have IOMEM This avoids the MODPOST error: ERROR: "devm_ioremap_resource" [drivers/auxdisplay/img-ascii-lcd.ko] undefined! Signed-off-by: Thomas Meyer Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/auxdisplay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index d7d21118d3e0f..2c2ed9cf87962 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -136,6 +136,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help From 1751e8a6cb935e555fcdbcb9ab4f0446e322ca3e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Nov 2017 13:05:09 -0800 Subject: [PATCH 183/406] Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/mtd/mtdsuper.c | 6 +-- drivers/staging/lustre/lustre/llite/file.c | 2 +- .../staging/lustre/lustre/llite/llite_lib.c | 14 ++--- fs/9p/vfs_super.c | 6 +-- fs/adfs/super.c | 4 +- fs/affs/amigaffs.c | 2 +- fs/affs/bitmap.c | 6 +-- fs/affs/super.c | 16 +++--- fs/afs/super.c | 4 +- fs/befs/ChangeLog | 2 +- fs/befs/linuxvfs.c | 4 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent_io.c | 2 +- fs/btrfs/ioctl.c | 4 +- fs/btrfs/super.c | 50 +++++++++--------- fs/btrfs/volumes.c | 4 +- fs/ceph/super.c | 8 +-- fs/cifs/cifs_fs_sb.h | 2 +- fs/cifs/cifsfs.c | 12 ++--- fs/cifs/cifsglob.h | 4 +- fs/cifs/inode.c | 2 +- fs/cifs/xattr.c | 8 +-- fs/coda/inode.c | 4 +- fs/cramfs/inode.c | 4 +- fs/ecryptfs/main.c | 8 +-- fs/efs/super.c | 4 +- fs/ext2/balloc.c | 4 +- fs/ext2/ialloc.c | 4 +- fs/ext2/super.c | 20 +++---- fs/ext4/inode.c | 4 +- fs/ext4/super.c | 52 +++++++++---------- fs/f2fs/checkpoint.c | 10 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/recovery.c | 10 ++-- fs/f2fs/super.c | 28 +++++----- fs/fat/fatent.c | 6 +-- fs/fat/inode.c | 8 +-- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/freevxfs/vxfs_super.c | 4 +- fs/fs-writeback.c | 2 +- fs/fuse/inode.c | 12 ++--- fs/gfs2/ops_fstype.c | 16 +++--- fs/gfs2/super.c | 10 ++-- fs/gfs2/trans.c | 2 +- fs/hfs/mdb.c | 4 +- fs/hfs/super.c | 16 +++--- fs/hfsplus/super.c | 22 ++++---- fs/hpfs/map.c | 2 +- fs/hpfs/super.c | 8 +-- fs/inode.c | 10 ++-- fs/isofs/inode.c | 2 +- fs/jffs2/fs.c | 4 +- fs/jffs2/os-linux.h | 2 +- fs/jffs2/super.c | 4 +- fs/jfs/super.c | 10 ++-- fs/kernfs/mount.c | 2 +- fs/libfs.c | 6 +-- fs/locks.c | 2 +- fs/minix/inode.c | 4 +- fs/ncpfs/inode.c | 4 +- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 2 +- fs/nfs/super.c | 22 ++++---- fs/nilfs2/segment.c | 2 +- fs/nilfs2/super.c | 24 ++++----- fs/nilfs2/the_nilfs.c | 6 +-- fs/notify/fsnotify.c | 2 +- fs/nsfs.c | 2 +- fs/ntfs/super.c | 32 ++++++------ fs/ocfs2/file.c | 2 +- fs/ocfs2/super.c | 28 +++++----- fs/ocfs2/xattr.c | 2 +- fs/openpromfs/inode.c | 4 +- fs/orangefs/super.c | 8 +-- fs/overlayfs/super.c | 10 ++-- fs/proc/inode.c | 2 +- fs/proc/root.c | 2 +- fs/proc_namespace.c | 8 +-- fs/qnx4/inode.c | 4 +- fs/qnx6/inode.c | 4 +- fs/reiserfs/inode.c | 2 +- fs/reiserfs/journal.c | 6 +-- fs/reiserfs/prints.c | 4 +- fs/reiserfs/super.c | 18 +++---- fs/reiserfs/xattr.c | 10 ++-- fs/romfs/super.c | 4 +- fs/squashfs/super.c | 4 +- fs/statfs.c | 6 +-- fs/sysfs/mount.c | 2 +- fs/sysv/inode.c | 2 +- fs/sysv/super.c | 2 +- fs/ubifs/file.c | 2 +- fs/ubifs/io.c | 2 +- fs/ubifs/super.c | 20 +++---- fs/ubifs/ubifs.h | 4 +- fs/udf/super.c | 6 +-- fs/ufs/balloc.c | 8 +-- fs/ufs/ialloc.c | 10 ++-- fs/ufs/super.c | 30 +++++------ fs/xfs/xfs_log.c | 6 +-- fs/xfs/xfs_super.c | 8 +-- fs/xfs/xfs_super.h | 2 +- include/linux/fs.h | 2 +- include/uapi/linux/bfs_fs.h | 2 +- ipc/mqueue.c | 2 +- mm/shmem.c | 10 ++-- security/apparmor/apparmorfs.c | 2 +- security/apparmor/include/lib.h | 2 +- 111 files changed, 417 insertions(+), 417 deletions(-) diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index e43fea896d1ed..d58a61c093047 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -79,14 +79,14 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + ret = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); return ERR_PTR(ret); } /* go */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; return dget(sb->s_root); /* new mountpoint for an already mounted superblock */ @@ -202,7 +202,7 @@ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, not_an_MTD_device: #endif /* CONFIG_BLOCK */ - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) printk(KERN_NOTICE "MTD: Attempt to mount non-MTD device \"%s\"\n", dev_name); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 2d6e64dea2660..938b859b6650b 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1016,7 +1016,7 @@ static bool file_is_noatime(const struct file *file) if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) return true; - if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) return true; return false; diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 65ac5128f0057..8666f1e81ade7 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -313,11 +313,11 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, } if (data->ocd_connect_flags & OBD_CONNECT_ACL) { - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; sbi->ll_flags |= LL_SBI_ACL; } else { LCONSOLE_INFO("client wants to enable acl, but mdt not!\n"); - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; sbi->ll_flags &= ~LL_SBI_ACL; } @@ -660,7 +660,7 @@ void ll_kill_super(struct super_block *sb) struct ll_sb_info *sbi; /* not init sb ?*/ - if (!(sb->s_flags & MS_ACTIVE)) + if (!(sb->s_flags & SB_ACTIVE)) return; sbi = ll_s2sbi(sb); @@ -2039,8 +2039,8 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) int err; __u32 read_only; - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { - read_only = *flags & MS_RDONLY; + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { + read_only = *flags & SB_RDONLY; err = obd_set_info_async(NULL, sbi->ll_md_exp, sizeof(KEY_READ_ONLY), KEY_READ_ONLY, sizeof(read_only), @@ -2053,9 +2053,9 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) } if (read_only) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; else - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (sbi->ll_flags & LL_SBI_VERBOSE) LCONSOLE_WARN("Remounted %s %s\n", profilenm, diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8b75463cb2116..af03c2a901eb4 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -94,13 +94,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; - sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; + sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME; if (!v9ses->cache) - sb->s_flags |= MS_SYNCHRONOUS; + sb->s_flags |= SB_SYNCHRONOUS; #ifdef CONFIG_9P_FS_POSIX_ACL if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif return 0; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index c9fdfb1129335..cfda2c7caedce 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -213,7 +213,7 @@ static int parse_options(struct super_block *sb, char *options) static int adfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return parse_options(sb, data); } @@ -372,7 +372,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root; int ret = -EINVAL; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; asb = kzalloc(sizeof(*asb), GFP_KERNEL); if (!asb) diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 185d5ab7e986a..0f0e6925e97dd 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -453,7 +453,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...) pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf); if (!sb_rdonly(sb)) pr_warn("Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; va_end(args); } diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 2b1399611d9e6..5ba9ef2742f6e 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -250,12 +250,12 @@ int affs_init_bitmap(struct super_block *sb, int *flags) int i, res = 0; struct affs_sb_info *sbi = AFFS_SB(sb); - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) return 0; if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) { pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -288,7 +288,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) if (affs_checksum_block(sb, bh)) { pr_warn("Bitmap %u invalid - mounting %s read only.\n", bm->bm_key, sb->s_id); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; goto out; } pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key); diff --git a/fs/affs/super.c b/fs/affs/super.c index 884bedab7266a..1117e36134cc8 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = AFFS_SUPER_MAGIC; sb->s_op = &affs_sops; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL); if (!sbi) @@ -466,7 +466,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS || chksum == MUFS_DCOFS) && !sb_rdonly(sb)) { pr_notice("Dircache FS - mounting %s read only\n", sb->s_id); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } switch (chksum) { case MUFS_FS: @@ -488,7 +488,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) /* fall thru */ case FS_OFS: affs_set_opt(sbi->s_flags, SF_OFS); - sb->s_flags |= MS_NOEXEC; + sb->s_flags |= SB_NOEXEC; break; case MUFS_DCOFS: case MUFS_INTLOFS: @@ -497,7 +497,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) case FS_INTLOFS: affs_set_opt(sbi->s_flags, SF_INTL); affs_set_opt(sbi->s_flags, SF_OFS); - sb->s_flags |= MS_NOEXEC; + sb->s_flags |= SB_NOEXEC; break; default: pr_err("Unknown filesystem on device %s: %08X\n", @@ -513,7 +513,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sig, sig[3] + '0', blocksize); } - sb->s_flags |= MS_NODEV | MS_NOSUID; + sb->s_flags |= SB_NODEV | SB_NOSUID; sbi->s_data_blksize = sb->s_blocksize; if (affs_test_opt(sbi->s_flags, SF_OFS)) @@ -570,7 +570,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; memcpy(volume, sbi->s_volume, 32); if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, @@ -596,10 +596,10 @@ affs_remount(struct super_block *sb, int *flags, char *data) memcpy(sbi->s_volume, volume, 32); spin_unlock(&sbi->symlink_lock); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) affs_free_bitmap(sb); else res = affs_init_bitmap(sb, flags); diff --git a/fs/afs/super.c b/fs/afs/super.c index 875b5eb02242a..d3f97da61bdfc 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -496,10 +496,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (ret < 0) goto error_sb; as = NULL; - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); - ASSERTCMP(sb->s_flags, &, MS_ACTIVE); + ASSERTCMP(sb->s_flags, &, SB_ACTIVE); afs_destroy_sbi(as); as = NULL; } diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index 75a461cfaca62..16f2dfe8c2f74 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -365,7 +365,7 @@ Version 0.4 (2001-10-28) (fs/befs/super.c) * Tell the kernel to only mount befs read-only. - By setting the MS_RDONLY flag in befs_read_super(). + By setting the SB_RDONLY flag in befs_read_super(). Not that it was possible to write before. But now the kernel won't even try. (fs/befs/super.c) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index a92355cc453bf..ee236231cafac 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -841,7 +841,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { befs_warning(sb, "No write support. Marking filesystem read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* @@ -948,7 +948,7 @@ static int befs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) return -EINVAL; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7df5536ab61e..51477a537c837 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2957,7 +2957,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) */ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) { - return fs_info->sb->s_flags & MS_RDONLY || btrfs_fs_closing(fs_info); + return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info); } static inline void free_fs_info(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 16045ea86fc13..f9e9f721efe22 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, struct btrfs_bio *bbio = NULL; int ret; - ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); + ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); BUG_ON(!mirror_num); bio = btrfs_io_bio_alloc(1); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd172a93d11a9..d748ad1c3620a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1172,7 +1172,7 @@ static int cluster_pages_for_defrag(struct inode *inode, if (!i_done || ret) goto out; - if (!(inode->i_sb->s_flags & MS_ACTIVE)) + if (!(inode->i_sb->s_flags & SB_ACTIVE)) goto out; /* @@ -1333,7 +1333,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * make sure we stop running if someone unmounts * the FS */ - if (!(inode->i_sb->s_flags & MS_ACTIVE)) + if (!(inode->i_sb->s_flags & SB_ACTIVE)) break; if (btrfs_defrag_cancelled(fs_info)) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65af029559b58..305cae7444a07 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -107,7 +107,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) return; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; btrfs_info(fs_info, "forced readonly"); /* * Note that a running device replace operation is not @@ -137,7 +137,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function /* * Special case: if the error is EROFS, and we're already - * under MS_RDONLY, then it is safe here. + * under SB_RDONLY, then it is safe here. */ if (errno == -EROFS && sb_rdonly(sb)) return; @@ -168,7 +168,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); /* Don't go through full error handling during mount */ - if (sb->s_flags & MS_BORN) + if (sb->s_flags & SB_BORN) btrfs_handle_error(fs_info); } @@ -625,7 +625,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_acl: #ifdef CONFIG_BTRFS_FS_POSIX_ACL - info->sb->s_flags |= MS_POSIXACL; + info->sb->s_flags |= SB_POSIXACL; break; #else btrfs_err(info, "support for ACL not compiled in!"); @@ -633,7 +633,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, goto out; #endif case Opt_noacl: - info->sb->s_flags &= ~MS_POSIXACL; + info->sb->s_flags &= ~SB_POSIXACL; break; case Opt_notreelog: btrfs_set_and_info(info, NOTREELOG, @@ -851,7 +851,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, /* * Extra check for current option against current flag */ - if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { + if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) { btrfs_err(info, "nologreplay must be used with ro mount option"); ret = -EINVAL; @@ -1147,7 +1147,7 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; #ifdef CONFIG_BTRFS_FS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; @@ -1180,7 +1180,7 @@ static int btrfs_fill_super(struct super_block *sb, } cleancache_init_fs(sb); - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; return 0; fail_close: @@ -1277,7 +1277,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(info, DISCARD)) seq_puts(seq, ",discard"); - if (!(info->sb->s_flags & MS_POSIXACL)) + if (!(info->sb->s_flags & SB_POSIXACL)) seq_puts(seq, ",noacl"); if (btrfs_test_opt(info, SPACE_CACHE)) seq_puts(seq, ",space_cache"); @@ -1409,11 +1409,11 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { - if (flags & MS_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, + if (flags & SB_RDONLY) { + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY, device_name, newargs); } else { - mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, + mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY, device_name, newargs); if (IS_ERR(mnt)) { root = ERR_CAST(mnt); @@ -1565,7 +1565,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, u64 subvol_objectid = 0; int error = 0; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, @@ -1619,13 +1619,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error) goto error_fs_info; - if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { + if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { error = -EACCES; goto error_close_devices; } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); @@ -1635,7 +1635,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (s->s_root) { btrfs_close_devices(fs_devices); free_fs_info(fs_info); - if ((flags ^ s->s_flags) & MS_RDONLY) + if ((flags ^ s->s_flags) & SB_RDONLY) error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); @@ -1702,11 +1702,11 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, { if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || - (flags & MS_RDONLY))) { + (flags & SB_RDONLY))) { /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); - if (flags & MS_RDONLY) + if (flags & SB_RDONLY) sync_filesystem(fs_info->sb); } } @@ -1766,10 +1766,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { /* * this also happens on 'umount -rf' or on shutdown, when * the filesystem is busy. @@ -1781,10 +1781,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) /* avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* - * Setting MS_RDONLY will put the cleaner thread to + * Setting SB_RDONLY will put the cleaner thread to * sleep at the next loop if it's already active. * If it's already asleep, we'll leave unused block * groups on disk until we're mounted read-write again @@ -1856,7 +1856,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; set_bit(BTRFS_FS_OPEN, &fs_info->flags); } @@ -1866,9 +1866,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; restore: - /* We've hit an error - don't reset MS_RDONLY */ + /* We've hit an error - don't reset SB_RDONLY */ if (sb_rdonly(sb)) - old_flags |= MS_RDONLY; + old_flags |= SB_RDONLY; sb->s_flags = old_flags; fs_info->mount_opt = old_opts; fs_info->compress_type = old_compress_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1ecb938ba4d7..925070b9ce030 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2384,7 +2384,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; ret = btrfs_prepare_sprout(fs_info); if (ret) { btrfs_abort_transaction(trans, ret); @@ -2497,7 +2497,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); error_trans: if (seeding_dev) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (trans) btrfs_end_transaction(trans); rcu_string_free(device->name); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fe9fbb3f13f7c..a62d2a9841dc2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -331,11 +331,11 @@ static int parse_fsopt_token(char *c, void *private) break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: - fsopt->sb_flags |= MS_POSIXACL; + fsopt->sb_flags |= SB_POSIXACL; break; #endif case Opt_noacl: - fsopt->sb_flags &= ~MS_POSIXACL; + fsopt->sb_flags &= ~SB_POSIXACL; break; default: BUG_ON(token); @@ -520,7 +520,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nopoolperm"); #ifdef CONFIG_CEPH_FS_POSIX_ACL - if (fsopt->sb_flags & MS_POSIXACL) + if (fsopt->sb_flags & SB_POSIXACL) seq_puts(m, ",acl"); else seq_puts(m, ",noacl"); @@ -988,7 +988,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, dout("ceph_mount\n"); #ifdef CONFIG_CEPH_FS_POSIX_ACL - flags |= MS_POSIXACL; + flags |= SB_POSIXACL; #endif err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); if (err < 0) { diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index cbd216b572390..350fa55a1bf79 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -42,7 +42,7 @@ #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ -#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ +#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of SB_POSIXACL in mnt_cifs_flags */ #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */ #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */ #define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c8b75d33f310..31b7565b16175 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -125,7 +125,7 @@ cifs_read_super(struct super_block *sb) tcon = cifs_sb_master_tcon(cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -497,7 +497,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",cifsacl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) seq_puts(s, ",dynperm"); - if (root->d_sb->s_flags & MS_POSIXACL) + if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(s, ",acl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) seq_puts(s, ",mfsymlinks"); @@ -573,7 +573,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) static int cifs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return 0; } @@ -708,7 +708,7 @@ cifs_do_mount(struct file_system_type *fs_type, rc = cifs_mount(cifs_sb, volume_info); if (rc) { - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); @@ -720,7 +720,7 @@ cifs_do_mount(struct file_system_type *fs_type, mnt_data.flags = flags; /* BB should we make this contingent on mount parm? */ - flags |= MS_NODIRATIME | MS_NOATIME; + flags |= SB_NODIRATIME | SB_NOATIME; sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { @@ -739,7 +739,7 @@ cifs_do_mount(struct file_system_type *fs_type, goto out_super; } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; } root = cifs_get_root(volume_info, sb); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e185b2853eab7..b16583594d1ad 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -559,8 +559,8 @@ struct smb_vol { CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID) -#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \ - MS_NODEV | MS_SYNCHRONOUS) +#define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ + SB_NODEV | SB_SYNCHRONOUS) struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7c732cb441641..ecb99079363ab 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -985,7 +985,7 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) } cifs_fattr_to_inode(inode, fattr); - if (sb->s_flags & MS_NOATIME) + if (sb->s_flags & SB_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { inode->i_ino = hash; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 52f975d848a07..316af84674f11 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -117,7 +117,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, #ifdef CONFIG_CIFS_POSIX if (!value) goto out; - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, value, (const int)size, ACL_TYPE_ACCESS, cifs_sb->local_nls, @@ -129,7 +129,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, #ifdef CONFIG_CIFS_POSIX if (!value) goto out; - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, value, (const int)size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, @@ -266,7 +266,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_ACL_ACCESS: #ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, value, size, ACL_TYPE_ACCESS, cifs_sb->local_nls, @@ -276,7 +276,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_ACL_DEFAULT: #ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, value, size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6f0a6a4d5faa9..97424cf206c08 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -96,7 +96,7 @@ void coda_destroy_inodecache(void) static int coda_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } @@ -188,7 +188,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) mutex_unlock(&vc->vc_mutex); sb->s_fs_info = vc; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; sb->s_blocksize = 4096; /* XXXXX what do we put here?? */ sb->s_blocksize_bits = 12; sb->s_magic = CODA_SUPER_MAGIC; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9a2ab419ba624..017b0ab19bc4d 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -505,7 +505,7 @@ static void cramfs_kill_sb(struct super_block *sb) static int cramfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -592,7 +592,7 @@ static int cramfs_finalize_super(struct super_block *sb, struct inode *root; /* Set it all up.. */ - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_op = &cramfs_ops; root = get_cramfs_inode(sb, cramfs_root, 0); if (IS_ERR(root)) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index f2677c90d96e1..025d66a705db6 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -560,8 +560,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags * Set the POSIX ACL flag based on whether they're enabled in the lower * mount. */ - s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + s->s_flags = flags & ~SB_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & SB_POSIXACL; /** * Force a read-only eCryptfs mount when: @@ -569,7 +569,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags * 2) The ecryptfs_encrypted_view mount option is specified */ if (sb_rdonly(path.dentry->d_sb) || mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; @@ -602,7 +602,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_dentry_private(s->s_root, root_info); root_info->lower_path = path; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; return dget(s->s_root); out_free: diff --git a/fs/efs/super.c b/fs/efs/super.c index 65b59009555b6..6ffb7ba1547a6 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -116,7 +116,7 @@ static void destroy_inodecache(void) static int efs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -311,7 +311,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) #ifdef DEBUG pr_info("forcing read-only mode\n"); #endif - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } s->s_op = &efs_superblock_operations; s->s_export_op = &efs_export_ops; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e1b3724bebf23..33db13365c5eb 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -548,7 +548,7 @@ void ext2_free_blocks (struct inode * inode, unsigned long block, } mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); group_adjust_blocks(sb, block_group, desc, bh2, group_freed); @@ -1424,7 +1424,7 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, percpu_counter_sub(&sbi->s_freeblocks_counter, num); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); *errp = 0; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a1fc3dabca41b..6484199b35d1e 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -145,7 +145,7 @@ void ext2_free_inode (struct inode * inode) else ext2_release_inode(sb, block_group, is_directory); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); @@ -517,7 +517,7 @@ struct inode *ext2_new_inode(struct inode *dir, umode_t mode, goto fail; got: mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e2b6be03e69b5..7646818ab266f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -75,7 +75,7 @@ void ext2_error(struct super_block *sb, const char *function, if (test_opt(sb, ERRORS_RO)) { ext2_msg(sb, KERN_CRIT, "error: remounting filesystem read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } @@ -656,7 +656,7 @@ static int ext2_setup_super (struct super_block * sb, ext2_msg(sb, KERN_ERR, "error: revision level too high, " "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } if (read_only) return res; @@ -924,9 +924,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resuid = opts.s_resuid; sbi->s_resgid = opts.s_resgid; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - MS_POSIXACL : 0); + SB_POSIXACL : 0); sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -1178,7 +1178,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_msg(sb, KERN_WARNING, "warning: mounting ext3 filesystem as ext2"); if (ext2_setup_super (sb, es, sb_rdonly(sb))) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ext2_write_super(sb); return 0; @@ -1341,9 +1341,9 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) "dax flag with busy inodes while remounting"); new_opts.s_mount_opt ^= EXT2_MOUNT_DAX; } - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out_set; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || !(sbi->s_mount_state & EXT2_VALID_FS)) goto out_set; @@ -1379,7 +1379,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) */ sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; spin_unlock(&sbi->s_lock); ext2_write_super(sb); @@ -1392,8 +1392,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_opt = new_opts.s_mount_opt; sbi->s_resuid = new_opts.s_resuid; sbi->s_resgid = new_opts.s_resgid; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0992d76f7ab15..7df2c5644e59c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2742,7 +2742,7 @@ static int ext4_writepages(struct address_space *mapping, * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that * will obscure the real source of the problem. We test - * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because + * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because * the latter could be true if the filesystem is mounted * read-only, and in that case, ext4_writepages should * *never* be called, so if that ever happens, we would want @@ -5183,7 +5183,7 @@ static int ext4_do_update_inode(handle_t *handle, ext4_inode_csum_set(inode, raw_inode, ei); spin_unlock(&ei->i_raw_lock); - if (inode->i_sb->s_flags & MS_LAZYTIME) + if (inode->i_sb->s_flags & SB_LAZYTIME) ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, bh->b_data); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0556cd036b69e..7c46693a14d76 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -422,7 +422,7 @@ static void ext4_handle_error(struct super_block *sb) * before ->s_flags update */ smp_wmb(); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if (test_opt(sb, ERRORS_PANIC)) { if (EXT4_SB(sb)->s_journal && @@ -635,7 +635,7 @@ void __ext4_abort(struct super_block *sb, const char *function, * before ->s_flags update */ smp_wmb(); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); @@ -1682,10 +1682,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sb->s_flags |= SB_I_VERSION; return 1; case Opt_lazytime: - sb->s_flags |= MS_LAZYTIME; + sb->s_flags |= SB_LAZYTIME; return 1; case Opt_nolazytime: - sb->s_flags &= ~MS_LAZYTIME; + sb->s_flags &= ~SB_LAZYTIME; return 1; } @@ -2116,7 +2116,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { ext4_msg(sb, KERN_ERR, "revision level too high, " "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } if (read_only) goto done; @@ -2429,7 +2429,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ - if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { + if (es->s_last_orphan && !(s_flags & SB_RDONLY)) { ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list.\n"); es->s_last_orphan = 0; @@ -2438,19 +2438,19 @@ static void ext4_orphan_cleanup(struct super_block *sb, return; } - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; /* * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) { int ret = ext4_enable_quotas(sb); if (!ret) @@ -2539,7 +2539,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } } #endif - sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sb->s_flags = s_flags; /* Restore SB_RDONLY status */ } /* @@ -2741,7 +2741,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) if (ext4_has_feature_readonly(sb)) { ext4_msg(sb, KERN_INFO, "filesystem is read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; return 1; } @@ -3623,8 +3623,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_iflags |= SB_I_CGROUPWB; } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (ext4_has_compat_features(sb) || @@ -4199,7 +4199,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (ext4_setup_super(sb, es, sb_rdonly(sb))) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && @@ -4693,7 +4693,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) * the clock is set in the future, and this will cause e2fsck * to complain and force a full file system check. */ - if (!(sb->s_flags & MS_RDONLY)) + if (!(sb->s_flags & SB_RDONLY)) es->s_wtime = cpu_to_le32(get_seconds()); if (sb->s_bdev->bd_part) es->s_kbytes_written = @@ -5047,8 +5047,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); es = sbi->s_es; @@ -5057,16 +5057,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } - if (*flags & MS_LAZYTIME) - sb->s_flags |= MS_LAZYTIME; + if (*flags & SB_LAZYTIME) + sb->s_flags |= SB_LAZYTIME; - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; } - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { err = sync_filesystem(sb); if (err < 0) goto restore_opts; @@ -5078,7 +5078,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * First of all, the unconditional stuff we have to do * to disable replay of the journal when we next remount */ - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * OK, test if we are remounting a valid rw partition @@ -5140,7 +5140,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext4_setup_super(sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (ext4_has_feature_mmp(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) { @@ -5164,7 +5164,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } ext4_setup_system_zone(sb); - if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) + if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA @@ -5182,7 +5182,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #endif - *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); + *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd2e73e10857a..4aa69bc1c70af 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -617,17 +617,17 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); - sbi->sb->s_flags &= ~MS_RDONLY; + sbi->sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= MS_ACTIVE; + sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -658,7 +658,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (quota_enabled) f2fs_quota_off_umount(sbi->sb); #endif - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f4e094e816c63..6abf26c31d018 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2378,7 +2378,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) static inline int f2fs_readonly(struct super_block *sb) { - return sb->s_flags & MS_RDONLY; + return sb->s_flags & SB_RDONLY; } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5d5bba462f263..d844dcb805703 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1005,7 +1005,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, cpc.reason = __get_cp_reason(sbi); gc_more: - if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { + if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { ret = -EINVAL; goto stop; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 92c57ace1939b..b3a14b0429f23 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -598,16 +598,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int quota_enabled; #endif - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); - sbi->sb->s_flags &= ~MS_RDONLY; + sbi->sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= MS_ACTIVE; + sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -671,7 +671,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) if (quota_enabled) f2fs_quota_off_umount(sbi->sb); #endif - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return ret ? ret: err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6c5dd450002d..708155d9c2e42 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -534,10 +534,10 @@ static int parse_options(struct super_block *sb, char *options) #endif break; case Opt_lazytime: - sb->s_flags |= MS_LAZYTIME; + sb->s_flags |= SB_LAZYTIME; break; case Opt_nolazytime: - sb->s_flags &= ~MS_LAZYTIME; + sb->s_flags &= ~SB_LAZYTIME; break; #ifdef CONFIG_QUOTA case Opt_quota: @@ -1168,7 +1168,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); - sbi->sb->s_flags |= MS_LAZYTIME; + sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_mounted_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); @@ -1236,7 +1236,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_msg(sb, KERN_INFO, "Try to recover all the superblocks, ret: %d", err); @@ -1255,17 +1255,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) + if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) goto skip; #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { + if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; } else { /* dquot_resume needs RW */ - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); } else if (f2fs_sb_has_quota_ino(sb)) { @@ -1288,7 +1288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { if (sbi->gc_thread) { stop_gc_thread(sbi); need_restart_gc = true; @@ -1300,7 +1300,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1314,7 +1314,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); destroy_flush_cmd_control(sbi, false); } else { @@ -1329,8 +1329,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) kfree(s_qf_names[i]); #endif /* Update the POSIXACL Flag */ - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); return 0; restore_gc: @@ -2472,8 +2472,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; sb->s_time_gran = 1; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 48b2336692f9f..bac10de678cc9 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -392,7 +392,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); if (err) @@ -597,7 +597,7 @@ int fat_free_clusters(struct inode *inode, int cluster) } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { - if (sb->s_flags & MS_SYNCHRONOUS) { + if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; @@ -612,7 +612,7 @@ int fat_free_clusters(struct inode *inode, int cluster) fat_collect_bhs(bhs, &nr_bhs, &fatent); } while (cluster != FAT_ENT_EOF); - if (sb->s_flags & MS_SYNCHRONOUS) { + if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 30c52394a7adb..016c46b5e44c9 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -781,12 +781,12 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) { int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); - *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); sync_filesystem(sb); /* make sure we update state on remount. */ - new_rdonly = *flags & MS_RDONLY; + new_rdonly = *flags & SB_RDONLY; if (new_rdonly != sb_rdonly(sb)) { if (new_rdonly) fat_set_state(sb, 0, 0); @@ -1352,7 +1352,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, if (opts->unicode_xlate) opts->utf8 = 0; if (opts->nfs == FAT_NFS_NOSTALE_RO) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_export_op = &fat_export_ops_nostale; } @@ -1608,7 +1608,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, return -ENOMEM; sb->s_fs_info = sbi; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; sb->s_magic = MSDOS_SUPER_MAGIC; sb->s_op = &fat_sops; sb->s_export_op = &fat_export_ops; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index acc3aa30ee549..f9bdc1e01c98e 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -33,7 +33,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) if (opts->errors == FAT_ERRORS_PANIC) panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); else if (opts->errors == FAT_ERRORS_RO && !sb_rdonly(sb)) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); } } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 7d6a105d601b5..d24d2758a3632 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -646,7 +646,7 @@ static void setup(struct super_block *sb) { MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations; sb->s_d_op = &msdos_dentry_operations; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; } static int msdos_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 455ce5b77e9bf..f989efa051a0d 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -116,7 +116,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) static int vxfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -220,7 +220,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) int ret = -EINVAL; u32 j; - sbp->s_flags |= MS_RDONLY; + sbp->s_flags |= SB_RDONLY; infp = kzalloc(sizeof(*infp), GFP_KERNEL); if (!infp) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 08f5debd07d10..cea4836385b72 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -490,7 +490,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); - if (!(inode->i_sb->s_flags & MS_ACTIVE) || + if (!(inode->i_sb->s_flags & SB_ACTIVE) || inode->i_state & (I_WB_SWITCH | I_FREEING) || inode_to_wb(inode) == isw->new_wb) { spin_unlock(&inode->i_lock); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f504d615d923..624f18bbfd2b3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -130,7 +130,7 @@ static void fuse_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - if (inode->i_sb->s_flags & MS_ACTIVE) { + if (inode->i_sb->s_flags & SB_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); @@ -141,7 +141,7 @@ static void fuse_evict_inode(struct inode *inode) static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (*flags & MS_MANDLOCK) + if (*flags & SB_MANDLOCK) return -EINVAL; return 0; @@ -1056,10 +1056,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) int is_bdev = sb->s_bdev != NULL; err = -EINVAL; - if (sb->s_flags & MS_MANDLOCK) + if (sb->s_flags & SB_MANDLOCK) goto err; - sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); + sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); if (!parse_fuse_opt(data, &d, is_bdev)) goto err; @@ -1109,9 +1109,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_dev_free; /* Handle umasking inside the fuse code */ - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) fc->dont_mask = 1; - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; fc->default_permissions = d.default_permissions; fc->allow_other = d.allow_other; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a3711f5434052..ad55eb86a2504 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1065,15 +1065,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp->sd_args = *args; if (sdp->sd_args.ar_spectator) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; set_bit(SDF_RORECOVERY, &sdp->sd_flags); } if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); - sb->s_flags |= MS_NOSEC; + sb->s_flags |= SB_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_d_op = &gfs2_dops; @@ -1257,7 +1257,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, struct gfs2_args args; struct gfs2_sbd *sdp; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1313,15 +1313,15 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (s->s_root) { error = -EBUSY; - if ((flags ^ s->s_flags) & MS_RDONLY) + if ((flags ^ s->s_flags) & SB_RDONLY) goto error_super; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); + error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0); if (error) goto error_super; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; bdev->bd_super = s; } @@ -1365,7 +1365,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, pr_warn("gfs2 mount does not exist\n"); return ERR_CAST(s); } - if ((flags ^ s->s_flags) & MS_RDONLY) { + if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); return ERR_PTR(-EBUSY); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9cb5c9a97d69d..d81d46e197264 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1256,10 +1256,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) return -EINVAL; if (sdp->sd_args.ar_spectator) - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; - if ((sb->s_flags ^ *flags) & MS_RDONLY) { - if (*flags & MS_RDONLY) + if ((sb->s_flags ^ *flags) & SB_RDONLY) { + if (*flags & SB_RDONLY) error = gfs2_make_fs_ro(sdp); else error = gfs2_make_fs_rw(sdp); @@ -1269,9 +1269,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sdp->sd_args = args; if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; else - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); else diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a85ca8b2c9ba4..ca8b72d0a8315 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -117,7 +117,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) kfree(tr); up_read(&sdp->sd_log_flush_lock); - if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) + if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); if (alloced) sb_end_intwrite(sdp->sd_vfs); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 894994d2c8850..460281b1299eb 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -204,11 +204,11 @@ int hfs_mdb_get(struct super_block *sb) attrib = mdb->drAtrb; if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { pr_warn("filesystem is marked locked, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if (!sb_rdonly(sb)) { /* Mark the volume uncleanly unmounted in case we crash */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 7e0d65e9586c7..173876782f73f 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -114,18 +114,18 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + *flags |= SB_NODIRATIME; + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (!(*flags & MS_RDONLY)) { + if (!(*flags & SB_RDONLY)) { if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) { pr_warn("filesystem is marked locked, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } } return 0; @@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &hfs_super_operations; sb->s_xattr = hfs_xattr_handlers; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; mutex_init(&sbi->bitmap_lock); res = hfs_mdb_get(sb); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e5bb2de2262ae..1d458b7169572 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -329,9 +329,9 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfsplus_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (!(*flags & MS_RDONLY)) { + if (!(*flags & SB_RDONLY)) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; int force = 0; @@ -340,20 +340,20 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (force) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("filesystem is marked locked, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { pr_warn("filesystem is marked journaled, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } } return 0; @@ -455,16 +455,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("Filesystem is marked locked, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !sb_rdonly(sb)) { pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } err = -EINVAL; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index e0e60b1484006..7c49f1ef0c850 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -288,7 +288,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, goto bail; } if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { - if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok; + if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1516fb4e28f40..c45a3b9b9ac7e 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -78,7 +78,7 @@ void hpfs_error(struct super_block *s, const char *fmt, ...) else { pr_cont("; remounting read-only\n"); mark_dirty(s, 0); - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } } else if (sb_rdonly(s)) pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); @@ -457,7 +457,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) sync_filesystem(s); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; @@ -488,7 +488,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; sbi->sb_err = errs; sbi->sb_timeshift = timeshift; - if (!(*flags & MS_RDONLY)) mark_dirty(s, 1); + if (!(*flags & SB_RDONLY)) mark_dirty(s, 1); hpfs_unlock(s); return 0; @@ -614,7 +614,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) goto bail4; } - s->s_flags |= MS_NOATIME; + s->s_flags |= SB_NOATIME; /* Fill superblock stuff */ s->s_magic = HPFS_SUPER_MAGIC; diff --git a/fs/inode.c b/fs/inode.c index fd401028a309e..03102d6ef044d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -416,7 +416,7 @@ void inode_add_lru(struct inode *inode) { if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) && - !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) + !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE) inode_lru_list_add(inode); } @@ -595,7 +595,7 @@ static void dispose_list(struct list_head *head) * @sb: superblock to operate on * * Make sure that no inodes with zero refcount are retained. This is - * called by superblock shutdown after having MS_ACTIVE flag removed, + * called by superblock shutdown after having SB_ACTIVE flag removed, * so any inode reaching zero refcount during or after that call will * be immediately evicted. */ @@ -1492,7 +1492,7 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & MS_ACTIVE)) { + if (!drop && (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; @@ -1644,7 +1644,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags) if (flags & S_MTIME) inode->i_mtime = *time; - if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) + if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION)) iflags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, iflags); return 0; @@ -1691,7 +1691,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, if (IS_NOATIME(inode)) return false; - if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) return false; if (mnt->mnt_flags & MNT_NOATIME) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 447a24d77b894..bc258a4402f6a 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,7 +114,7 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) return -EROFS; return 0; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e96c6b05e43e7..d8c274d39ddb9 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -409,10 +409,10 @@ int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data) mutex_unlock(&c->alloc_sem); } - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) jffs2_start_garbage_collect_thread(c); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 824e61ede465f..c2fbec19c6167 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -59,7 +59,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) } -#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) +#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & SB_RDONLY) #define SECTOR_ADDR(x) ( (((unsigned long)(x) / c->sector_size) * c->sector_size) ) #ifndef CONFIG_JFFS2_FS_WRITEBUFFER diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 153f1c6eb1693..f60dee7faf037 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -301,10 +301,10 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &jffs2_super_operations; sb->s_export_op = &jffs2_export_ops; - sb->s_flags = sb->s_flags | MS_NOATIME; + sb->s_flags = sb->s_flags | SB_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif ret = jffs2_do_fill_super(sb, data, silent); return ret; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 2f7b3af5b8b7a..90373aebfdca1 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -87,7 +87,7 @@ static void jfs_handle_error(struct super_block *sb) else if (sbi->flag & JFS_ERR_REMOUNT_RO) { jfs_err("ERROR: (device %s): remounting filesystem as read-only", sb->s_id); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* nothing is done for continue beyond marking the superblock dirty */ @@ -477,7 +477,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return rc; } - if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) { /* * Invalidate any previously read metadata. fsck may have * changed the on-disk data since we mounted r/o @@ -488,12 +488,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) ret = jfs_mount_rw(sb, 1); /* mark the fs r/w for quota activity */ - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; dquot_resume(sb, -1); return ret; } - if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { + if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) { rc = dquot_suspend(sb, -1); if (rc < 0) return rc; @@ -545,7 +545,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sbi->flag = flag; #ifdef CONFIG_JFS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif if (newLVSize) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 95a7c88baed9d..26dd9a50f3838 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -335,7 +335,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, deactivate_locked_super(sb); return ERR_PTR(error); } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; mutex_lock(&kernfs_mutex); list_add(&info->node, &root->supers); diff --git a/fs/libfs.c b/fs/libfs.c index 3aabe553fc450..7ff3cb904acdf 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,7 +246,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER, &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -277,7 +277,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_d_op = dops; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; return dget(s->s_root); Enomem: @@ -578,7 +578,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/locks.c b/fs/locks.c index 1bd71c4d663a8..21b4dfa289eea 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -141,7 +141,7 @@ static inline bool is_remote_lock(struct file *filp) { - return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK)); + return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK)); } static bool lease_breaking(struct file_lock *fl) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index b6829d6796432..72e308c3e66b9 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -125,9 +125,9 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) sync_filesystem(sb); ms = sbi->s_ms; - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { if (ms->s_state & MINIX_VALID_FS || !(sbi->s_mount_state & MINIX_VALID_FS)) return 0; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 129f1937fa2c1..41de88cdc053f 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -103,7 +103,7 @@ static void destroy_inodecache(void) static int ncp_remount(struct super_block *sb, int *flags, char* data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return 0; } @@ -547,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) else default_bufsize = 1024; - sb->s_flags |= MS_NODIRATIME; /* probably even noatime */ + sb->s_flags |= SB_NODIRATIME; /* probably even noatime */ sb->s_maxbytes = 0xFFFFFFFFU; sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e51ae52ed14ff..2f3f86726f5b9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1256,7 +1256,7 @@ static int nfs_dentry_delete(const struct dentry *dentry) /* Unhash it, so that ->d_iput() would be called */ return 1; } - if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + if (!(dentry->d_sb->s_flags & SB_ACTIVE)) { /* Unhash it, so that ancestors of killed async unlink * files will be cleaned up during umount */ return 1; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 38b93d54c02e2..b992d2382ffa3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -752,7 +752,7 @@ int nfs_getattr(const struct path *path, struct kstat *stat, * Note that we only have to check the vfsmount flags here: * - NFS always sets S_NOATIME by so checking it would give a * bogus result - * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * - NFS never sets SB_NOATIME or SB_NODIRATIME so there is * no point in checking those. */ if ((path->mnt->mnt_flags & MNT_NOATIME) || diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ab17fd4700a6..8357ff69962f2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -10,7 +10,7 @@ #include #include -#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) +#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 43cadb28db6ef..29bacdc56f6a9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -813,9 +813,9 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) */ seq_printf(m, "\n\topts:\t"); seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); - seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); - seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : ""); - seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); + seq_puts(m, root->d_sb->s_flags & SB_NODIRATIME ? ",nodiratime" : ""); nfs_show_mount_options(m, nfss, 1); seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); @@ -2296,11 +2296,11 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) /* * noac is a special case. It implies -o sync, but that's not * necessarily reflected in the mtab options. do_remount_sb - * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the + * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ if (data->flags & NFS_MOUNT_NOAC) - *flags |= MS_SYNCHRONOUS; + *flags |= SB_SYNCHRONOUS; /* compare new mount options with old ones */ error = nfs_compare_remount_data(nfss, data); @@ -2349,7 +2349,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; sb->s_time_gran = 1; sb->s_export_op = &nfs_export_ops; } @@ -2379,7 +2379,7 @@ static void nfs_clone_super(struct super_block *sb, /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; } nfs_initialise_sb(sb); @@ -2600,11 +2600,11 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; + sb_mntdata.mntflags |= SB_SYNCHRONOUS; if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) - if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; + if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) + sb_mntdata.mntflags |= SB_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); @@ -2641,7 +2641,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, if (error) goto error_splat_root; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; out: return mntroot; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f572538dcc4f5..9f3ffba41533e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1979,7 +1979,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; - int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); + int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE); int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3ce20cd44a209..3073b646e1bac 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -141,7 +141,7 @@ void __nilfs_error(struct super_block *sb, const char *function, if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } @@ -869,7 +869,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; #endif nilfs_set_default_options(sb, sbp); @@ -1133,7 +1133,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; goto restore_opts; } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); err = -EINVAL; @@ -1143,12 +1143,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { /* Shutting down log writer */ nilfs_detach_log_writer(sb); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set @@ -1178,7 +1178,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); @@ -1212,7 +1212,7 @@ static int nilfs_parse_snapshot_option(const char *option, const char *msg = NULL; int err; - if (!(sd->flags & MS_RDONLY)) { + if (!(sd->flags & SB_RDONLY)) { msg = "read-only option is not specified"; goto parse_error; } @@ -1286,7 +1286,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, struct dentry *root_dentry; int err, s_new = false; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1327,14 +1327,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags, snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); sb_set_blocksize(s, block_size(sd.bdev)); - err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (err) goto failed_super; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; } else if (!sd.cno) { if (nilfs_tree_is_busy(s->s_root)) { - if ((flags ^ s->s_flags) & MS_RDONLY) { + if ((flags ^ s->s_flags) & SB_RDONLY) { nilfs_msg(s, KERN_ERR, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index afebb5067cec8..1a85317e83f0f 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -220,7 +220,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) if (!valid_fs) { nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs"); - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { nilfs_msg(sb, KERN_INFO, "recovery required for readonly filesystem"); nilfs_msg(sb, KERN_INFO, @@ -286,7 +286,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) if (valid_fs) goto skip_recovery; - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { __u64 features; if (nilfs_test_opt(nilfs, NORECOVERY)) { @@ -309,7 +309,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) err = -EROFS; goto failed_unload; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } else if (nilfs_test_opt(nilfs, NORECOVERY)) { nilfs_msg(sb, KERN_ERR, "recovery cancelled because norecovery option was specified for a read/write mount"); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 81d8959b6aef9..219b269c737e6 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -67,7 +67,7 @@ void fsnotify_unmount_inodes(struct super_block *sb) /* * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually + * doing an __iget/iput with SB_ACTIVE clear would actually * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. */ diff --git a/fs/nsfs.c b/fs/nsfs.c index ef243e14b6ebd..7c6f76d29f564 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -255,5 +255,5 @@ void __init nsfs_init(void) nsfs_mnt = kern_mount(&nsfs); if (IS_ERR(nsfs_mnt)) panic("can't set nsfs up\n"); - nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; + nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3f70f041dbe9d..bb7159f697f2f 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -473,7 +473,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; #else /* NTFS_RW */ /* * For the read-write compiled driver, if we are remounting read-write, @@ -487,7 +487,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * When remounting read-only, mark the volume clean if no volume errors * have occurred. */ - if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) { static const char *es = ". Cannot remount read-write."; /* Remounting read-write. */ @@ -548,7 +548,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) NVolSetErrors(vol); return -EROFS; } - } else if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { + } else if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) { /* Remounting read-only. */ if (!NVolErrors(vol)) { if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) @@ -1799,7 +1799,7 @@ static bool load_system_files(ntfs_volume *vol) es3); goto iput_mirr_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", !vol->mftmirr_ino ? es1 : es2, es3); } else @@ -1937,7 +1937,7 @@ static bool load_system_files(ntfs_volume *vol) es1, es2); goto iput_vol_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1974,7 +1974,7 @@ static bool load_system_files(ntfs_volume *vol) } goto iput_logfile_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2019,7 +2019,7 @@ static bool load_system_files(ntfs_volume *vol) es1, es2); goto iput_root_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2042,7 +2042,7 @@ static bool load_system_files(ntfs_volume *vol) goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * Do not set NVolErrors() because ntfs_remount() might manage * to set the dirty flag in which case all would be well. @@ -2055,7 +2055,7 @@ static bool load_system_files(ntfs_volume *vol) * If (still) a read-write mount, set the NT4 compatibility flag on * newer NTFS version volumes. */ - if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) && + if (!(sb->s_flags & SB_RDONLY) && (vol->major_ver > 1) && ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) { static const char *es1 = "Failed to set NT4 compatibility flag"; static const char *es2 = ". Run chkdsk."; @@ -2069,7 +2069,7 @@ static bool load_system_files(ntfs_volume *vol) goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif @@ -2087,7 +2087,7 @@ static bool load_system_files(ntfs_volume *vol) goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2128,7 +2128,7 @@ static bool load_system_files(ntfs_volume *vol) es1, es2); goto iput_quota_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2150,7 +2150,7 @@ static bool load_system_files(ntfs_volume *vol) goto iput_quota_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } /* @@ -2171,7 +2171,7 @@ static bool load_system_files(ntfs_volume *vol) es1, es2); goto iput_usnjrnl_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2194,7 +2194,7 @@ static bool load_system_files(ntfs_volume *vol) goto iput_usnjrnl_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2728,7 +2728,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; #endif /* ! NTFS_RW */ /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index dc455d45a66ae..a1d0510554726 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -227,7 +227,7 @@ int ocfs2_should_update_atime(struct inode *inode, return 0; if ((inode->i_flags & S_NOATIME) || - ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) + ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))) return 0; /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 040bbb6a6e4b8..80efa5699fb0c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -675,9 +675,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } /* We're going to/from readonly mode. */ - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { /* Disable quota accounting before remounting RO */ - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { ret = ocfs2_susp_quotas(osb, 0); if (ret < 0) goto out; @@ -691,8 +691,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto unlock_osb; } - if (*flags & MS_RDONLY) { - sb->s_flags |= MS_RDONLY; + if (*flags & SB_RDONLY) { + sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; } else { if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { @@ -709,14 +709,14 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) ret = -EINVAL; goto unlock_osb; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; } trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags); unlock_osb: spin_unlock(&osb->osb_lock); /* Enable quota accounting after remounting RW */ - if (!ret && !(*flags & MS_RDONLY)) { + if (!ret && !(*flags & SB_RDONLY)) { if (sb_any_quota_suspended(sb)) ret = ocfs2_susp_quotas(osb, 1); else @@ -724,7 +724,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) if (ret < 0) { /* Return back changes... */ spin_lock(&osb->osb_lock); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; spin_unlock(&osb->osb_lock); goto out; @@ -744,9 +744,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? - MS_POSIXACL : 0); + SB_POSIXACL : 0); } out: return ret; @@ -1057,10 +1057,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; - sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | - ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); - /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, + /* Hard readonly mode only if: bdev_read_only, SB_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { if (!sb_rdonly(sb)) { @@ -2057,7 +2057,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; /* this is needed to support O_LARGEFILE */ cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); @@ -2568,7 +2568,7 @@ static int ocfs2_handle_error(struct super_block *sb) return rv; pr_crit("OCFS2: File system is now read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ocfs2_set_ro_flag(osb, 0); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5fdf269ba82e3..c5898c59d4118 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -901,7 +901,7 @@ static int ocfs2_xattr_list_entry(struct super_block *sb, case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: - if (!(sb->s_flags & MS_POSIXACL)) + if (!(sb->s_flags & SB_POSIXACL)) return 0; break; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 13215f26e3219..2200662a9bf18 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -369,7 +369,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) static int openprom_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } @@ -386,7 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) struct op_inode_info *oi; int ret; - s->s_flags |= MS_NOATIME; + s->s_flags |= SB_NOATIME; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = OPENPROM_SUPER_MAGIC; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 366750eef2019..36f1390b5ed7d 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -40,7 +40,7 @@ static int orangefs_show_options(struct seq_file *m, struct dentry *root) { struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(root->d_sb); - if (root->d_sb->s_flags & MS_POSIXACL) + if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(m, ",acl"); if (orangefs_sb->flags & ORANGEFS_OPT_INTR) seq_puts(m, ",intr"); @@ -60,7 +60,7 @@ static int parse_mount_options(struct super_block *sb, char *options, * Force any potential flags that might be set from the mount * to zero, ie, initialize to unset. */ - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; orangefs_sb->flags &= ~ORANGEFS_OPT_INTR; orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK; @@ -73,7 +73,7 @@ static int parse_mount_options(struct super_block *sb, char *options, token = match_token(p, tokens, args); switch (token) { case Opt_acl: - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; break; case Opt_intr: orangefs_sb->flags |= ORANGEFS_OPT_INTR; @@ -507,7 +507,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, ret = orangefs_fill_sb(sb, &new_op->downcall.resp.fs_mount, data, - flags & MS_SILENT ? 1 : 0); + flags & SB_SILENT ? 1 : 0); if (ret) { d = ERR_PTR(ret); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index be03578181d21..288d20f9a55a2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -326,7 +326,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ofs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs)) + if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) return -EROFS; return 0; @@ -1190,7 +1190,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; if (!ofs->workdir) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; @@ -1203,7 +1203,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ofs->upper_mnt) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) ofs->same_sb = NULL; @@ -1213,7 +1213,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_free_oe; if (!ofs->indexdir) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* Show index=off/on in /proc/mounts for any of the reasons above */ @@ -1227,7 +1227,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; sb->s_xattr = ovl_xattr_handlers; sb->s_fs_info = ofs; - sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; + sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; err = -ENOMEM; root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 225f541f7078c..dd0f826224274 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -483,7 +483,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent) /* User space would break if executables or devices appear on proc */ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; diff --git a/fs/proc/root.c b/fs/proc/root.c index 4e42aba97f2e3..ede8e64974be2 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -91,7 +91,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, { struct pid_namespace *ns; - if (flags & MS_KERNMOUNT) { + if (flags & SB_KERNMOUNT) { ns = data; data = NULL; } else { diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 7b635d1732137..b786840facd96 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -45,10 +45,10 @@ struct proc_fs_info { static int show_sb_opts(struct seq_file *m, struct super_block *sb) { static const struct proc_fs_info fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { MS_LAZYTIME, ",lazytime" }, + { SB_SYNCHRONOUS, ",sync" }, + { SB_DIRSYNC, ",dirsync" }, + { SB_MANDLOCK, ",mand" }, + { SB_LAZYTIME, ",lazytime" }, { 0, NULL } }; const struct proc_fs_info *fs_infop; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 3a67cfb142d88..3d46fe302fcb1 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -47,7 +47,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -199,7 +199,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) s->s_op = &qnx4_sops; s->s_magic = QNX4_SUPER_MAGIC; - s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ + s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ /* Check the superblock signature. Since the qnx4 code is dangerous, we should leave as quickly as possible diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 1192422a1c562..4aeb26bcb4d02 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -56,7 +56,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) static int qnx6_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -427,7 +427,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent) } s->s_op = &qnx6_sops; s->s_magic = QNX6_SUPER_MAGIC; - s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ + s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ /* ease the later tree level calculations */ sbi = QNX6_SB(s); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 11a48affa8824..b13fc024d2eed 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2106,7 +2106,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, journal_end(th); goto out_inserted_sd; } - } else if (inode->i_sb->s_flags & MS_POSIXACL) { + } else if (inode->i_sb->s_flags & SB_POSIXACL) { reiserfs_warning(inode->i_sb, "jdm-13090", "ACLs aren't enabled in the fs, " "but vfs thinks they are!"); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 69ff280bdfe88..70057359fbaf3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1960,7 +1960,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, /* * Cancel flushing of old commits. Note that neither of these works * will be requeued because superblock is being shutdown and doesn't - * have MS_ACTIVE set. + * have SB_ACTIVE set. */ reiserfs_cancel_old_flush(sb); /* wait for all commits to finish */ @@ -4302,7 +4302,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) * Avoid queueing work when sb is being shut down. Transaction * will be flushed on journal shutdown. */ - if (sb->s_flags & MS_ACTIVE) + if (sb->s_flags & SB_ACTIVE) queue_delayed_work(REISERFS_SB(sb)->commit_wq, &journal->j_work, HZ / 10); } @@ -4393,7 +4393,7 @@ void reiserfs_abort_journal(struct super_block *sb, int errno) if (!journal->j_errno) journal->j_errno = errno; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; set_bit(J_ABORTED, &journal->j_state); #ifdef CONFIG_REISERFS_CHECK diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 64f49cafbc5bf..7e288d97adcbb 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -390,7 +390,7 @@ void __reiserfs_error(struct super_block *sb, const char *id, return; reiserfs_info(sb, "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, -EIO); } @@ -409,7 +409,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, error_buf); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, errno); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702f..020c9cacbb2f3 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -121,7 +121,7 @@ void reiserfs_schedule_old_flush(struct super_block *s) * Avoid scheduling flush when sb is being shut down. It can race * with journal shutdown and free still queued delayed work. */ - if (sb_rdonly(s) || !(s->s_flags & MS_ACTIVE)) + if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE)) return; spin_lock(&sbi->old_work_lock); @@ -252,11 +252,11 @@ static int finish_unfinished(struct super_block *s) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - if (s->s_flags & MS_ACTIVE) { + if (s->s_flags & SB_ACTIVE) { ms_active_set = 0; } else { ms_active_set = 1; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; } /* Turn on quotas so that they are updated correctly */ for (i = 0; i < REISERFS_MAXQUOTAS; i++) { @@ -411,7 +411,7 @@ static int finish_unfinished(struct super_block *s) reiserfs_write_lock(s); if (ms_active_set) /* Restore the flag back */ - s->s_flags &= ~MS_ACTIVE; + s->s_flags &= ~SB_ACTIVE; #endif pathrelse(&path); if (done) @@ -1521,7 +1521,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) goto out_err_unlock; } - if (*mount_flags & MS_RDONLY) { + if (*mount_flags & SB_RDONLY) { reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); /* remount read-only */ @@ -1567,7 +1567,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); /* now it is safe to call journal_begin */ - s->s_flags &= ~MS_RDONLY; + s->s_flags &= ~SB_RDONLY; err = journal_begin(&th, s, 10); if (err) goto out_err_unlock; @@ -1575,7 +1575,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~MS_RDONLY; + s->s_flags &= ~SB_RDONLY; set_sb_umount_state(rs, REISERFS_ERROR_FS); if (!old_format_only(s)) set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); @@ -1590,7 +1590,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) goto out_err_unlock; reiserfs_write_unlock(s); - if (!(*mount_flags & MS_RDONLY)) { + if (!(*mount_flags & SB_RDONLY)) { dquot_resume(s, -1); reiserfs_write_lock(s); finish_unfinished(s); @@ -2055,7 +2055,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } args.objectid = REISERFS_ROOT_OBJECTID; args.dirid = REISERFS_ROOT_PARENT_OBJECTID; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46492fb37a4c6..5dbf5324bdda5 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -959,7 +959,7 @@ int reiserfs_lookup_privroot(struct super_block *s) /* * We need to take a copy of the mount flags since things like - * MS_RDONLY don't get set until *after* we're called. + * SB_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ int reiserfs_xattr_init(struct super_block *s, int mount_flags) @@ -971,7 +971,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (err) goto error; - if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) { + if (d_really_is_negative(privroot) && !(mount_flags & SB_RDONLY)) { inode_lock(d_inode(s->s_root)); err = create_privroot(REISERFS_SB(s)->priv_root); inode_unlock(d_inode(s->s_root)); @@ -999,11 +999,11 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt); } - /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ + /* The super_block SB_POSIXACL must mirror the (no)acl mount option. */ if (reiserfs_posixacl(s)) - s->s_flags |= MS_POSIXACL; + s->s_flags |= SB_POSIXACL; else - s->s_flags &= ~MS_POSIXACL; + s->s_flags &= ~SB_POSIXACL; return err; } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 0186fe6d39f3b..8f06fd1f3d692 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -451,7 +451,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int romfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -502,7 +502,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = 0xFFFFFFFF; sb->s_magic = ROMFS_MAGIC; - sb->s_flags |= MS_RDONLY | MS_NOATIME; + sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_op = &romfs_super_ops; #ifdef CONFIG_ROMFS_ON_MTD diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cf01e15a7b16d..8a73b97217c8a 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -195,7 +195,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) (u64) le64_to_cpu(sblk->id_table_start)); sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_op = &squashfs_super_ops; err = -ENOMEM; @@ -373,7 +373,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int squashfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } diff --git a/fs/statfs.c b/fs/statfs.c index b072a8bab71a1..5b2a24f0f263b 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -35,11 +35,11 @@ static int flags_by_mnt(int mnt_flags) static int flags_by_sb(int s_flags) { int flags = 0; - if (s_flags & MS_SYNCHRONOUS) + if (s_flags & SB_SYNCHRONOUS) flags |= ST_SYNCHRONOUS; - if (s_flags & MS_MANDLOCK) + if (s_flags & SB_MANDLOCK) flags |= ST_MANDLOCK; - if (s_flags & MS_RDONLY) + if (s_flags & SB_RDONLY) flags |= ST_RDONLY; return flags; } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 20b8f82e115b6..fb49510c5dcfa 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -30,7 +30,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, void *ns; bool new_sb; - if (!(flags & MS_KERNMOUNT)) { + if (!(flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3c47b7d5d4cf8..bec9f79adb25a 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -63,7 +63,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) sync_filesystem(sb); if (sbi->s_forced_ro) - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 0d56e486b3922..89765ddfb738c 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -333,7 +333,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) /* set up enough so that it can read an inode */ sb->s_op = &sysv_sops; if (sbi->s_forced_ro) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (sbi->s_truncate) sb->s_d_op = &sysv_dentry_operations; root_inode = sysv_iget(sb, SYSV_ROOT_INO); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a02aa59d1e245..dfe85069586eb 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1406,7 +1406,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, if (flags & S_MTIME) inode->i_mtime = *time; - if (!(inode->i_sb->s_flags & MS_LAZYTIME)) + if (!(inode->i_sb->s_flags & SB_LAZYTIME)) iflags |= I_DIRTY_SYNC; release = ui->dirty; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3be28900bf375..fe77e9625e847 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -84,7 +84,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) if (!c->ro_error) { c->ro_error = 1; c->no_chk_data_crc = 0; - c->vfs_sb->s_flags |= MS_RDONLY; + c->vfs_sb->s_flags |= SB_RDONLY; ubifs_warn(c, "switched to read-only mode, error %d", err); dump_stack(); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7503e7cdf8702..0beb285b143da 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -968,7 +968,7 @@ static int parse_standard_option(const char *option) pr_notice("UBIFS: parse %s\n", option); if (!strcmp(option, "sync")) - return MS_SYNCHRONOUS; + return SB_SYNCHRONOUS; return 0; } @@ -1160,8 +1160,8 @@ static int mount_ubifs(struct ubifs_info *c) size_t sz; c->ro_mount = !!sb_rdonly(c->vfs_sb); - /* Suppress error messages while probing if MS_SILENT is set */ - c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); + /* Suppress error messages while probing if SB_SILENT is set */ + c->probing = !!(c->vfs_sb->s_flags & SB_SILENT); err = init_constants_early(c); if (err) @@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } - if (c->ro_mount && !(*flags & MS_RDONLY)) { + if (c->ro_mount && !(*flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/W due to prior errors"); return -EROFS; @@ -1864,7 +1864,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) err = ubifs_remount_rw(c); if (err) return err; - } else if (!c->ro_mount && (*flags & MS_RDONLY)) { + } else if (!c->ro_mount && (*flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/O due to prior errors"); return -EROFS; @@ -2117,7 +2117,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, */ ubi = open_ubi(name, UBI_READONLY); if (IS_ERR(ubi)) { - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d", current->pid, name, (int)PTR_ERR(ubi)); return ERR_CAST(ubi); @@ -2143,18 +2143,18 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if (!!(flags & MS_RDONLY) != c1->ro_mount) { + if (!!(flags & SB_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } } else { - err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); if (err) goto out_deact; /* We do not support atime */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; #ifndef CONFIG_UBIFS_ATIME_SUPPORT - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; #else ubifs_msg(c, "full atime support is enabled."); #endif diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 63c7468147eb9..5ee7af879cc41 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1201,7 +1201,7 @@ struct ubifs_debug_info; * @need_recovery: %1 if the file-system needs recovery * @replaying: %1 during journal replay * @mounting: %1 while mounting - * @probing: %1 while attempting to mount if MS_SILENT mount flag is set + * @probing: %1 while attempting to mount if SB_SILENT mount flag is set * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay @@ -1850,7 +1850,7 @@ __printf(2, 3) void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...); /* * A conditional variant of 'ubifs_err()' which doesn't output anything - * if probing (ie. MS_SILENT set). + * if probing (ie. SB_SILENT set). */ #define ubifs_errc(c, fmt, ...) \ do { \ diff --git a/fs/udf/super.c b/fs/udf/super.c index f80e0a0f24d33..f73239a9a97da 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -650,7 +650,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sync_filesystem(sb); if (lvidiu) { int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); - if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) + if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & SB_RDONLY)) return -EACCES; } @@ -673,10 +673,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sbi->s_dmode = uopt.dmode; write_unlock(&sbi->s_cred_lock); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out_unlock; - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) udf_close_lvid(sb); else udf_open_lvid(sb); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b5cd79065ef9a..e727ee07dbe4a 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -115,7 +115,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -205,7 +205,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { @@ -567,7 +567,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -688,7 +688,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 916b4a4289334..e1ef0f0a13535 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -112,7 +112,7 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -146,14 +146,14 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bh); brelse(bh); } fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); @@ -284,7 +284,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -330,7 +330,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bh); brelse(bh); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 6440003f8ddc6..4d497e9c68830 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -282,7 +282,7 @@ void ufs_error (struct super_block * sb, const char * function, usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_mark_sb_dirty(sb); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } va_start(args, fmt); vaf.fmt = fmt; @@ -320,7 +320,7 @@ void ufs_panic (struct super_block * sb, const char * function, va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; pr_crit("panic (device %s): %s: %pV\n", sb->s_id, function, &vaf); va_end(args); @@ -905,7 +905,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=old is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -921,7 +921,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -937,7 +937,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep-cd is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -953,7 +953,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=openstep is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -968,7 +968,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=hp is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; default: @@ -1125,21 +1125,21 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) break; case UFS_FSACTIVE: pr_err("%s(): fs is active\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; case UFS_FSBAD: pr_err("%s(): fs is bad\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; default: pr_err("%s(): can't grok fs_clean 0x%x\n", __func__, usb1->fs_clean); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; } } else { pr_err("%s(): fs needs fsck\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* @@ -1328,7 +1328,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) return -EINVAL; } - if ((bool)(*mount_flags & MS_RDONLY) == sb_rdonly(sb)) { + if ((bool)(*mount_flags & SB_RDONLY) == sb_rdonly(sb)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; mutex_unlock(&UFS_SB(sb)->s_lock); return 0; @@ -1337,7 +1337,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) /* * fs was mouted as rw, remounting ro */ - if (*mount_flags & MS_RDONLY) { + if (*mount_flags & SB_RDONLY) { ufs_put_super_internal(sb); usb1->fs_time = cpu_to_fs32(sb, get_seconds()); if ((flags & UFS_ST_MASK) == UFS_ST_SUN @@ -1346,7 +1346,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ubh_mark_buffer_dirty (USPI_UBH(uspi)); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else { /* * fs was mounted as ro, remounting rw @@ -1370,7 +1370,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) mutex_unlock(&UFS_SB(sb)->s_lock); return -EPERM; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 38d4227895aef..a503af96d780e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -781,17 +781,17 @@ xfs_log_mount_finish( * something to an unlinked inode, the irele won't cause * premature truncation and freeing of the inode, which results * in log recovery failure. We have to evict the unreferenced - * lru inodes after clearing MS_ACTIVE because we don't + * lru inodes after clearing SB_ACTIVE because we don't * otherwise clean up the lru if there's a subsequent failure in * xfs_mountfs, which leads to us leaking the inodes if nothing * else (e.g. quotacheck) references the inodes before the * mount failure occurs. */ - mp->m_super->s_flags |= MS_ACTIVE; + mp->m_super->s_flags |= SB_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); - mp->m_super->s_flags &= ~MS_ACTIVE; + mp->m_super->s_flags &= ~SB_ACTIVE; evict_inodes(mp->m_super); /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0d..5122d3021117f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -212,9 +212,9 @@ xfs_parseargs( */ if (sb_rdonly(sb)) mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & MS_DIRSYNC) + if (sb->s_flags & SB_DIRSYNC) mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) mp->m_flags |= XFS_MOUNT_WSYNC; /* @@ -1312,7 +1312,7 @@ xfs_fs_remount( } /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { if (mp->m_flags & XFS_MOUNT_NORECOVERY) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); @@ -1368,7 +1368,7 @@ xfs_fs_remount( } /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 5f2f32408011d..fcc5dfc70aa0c 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -30,7 +30,7 @@ extern void xfs_qm_exit(void); #ifdef CONFIG_XFS_POSIX_ACL # define XFS_ACL_STRING "ACLs, " -# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +# define set_posix_acl_flag(sb) ((sb)->s_flags |= SB_POSIXACL) #else # define XFS_ACL_STRING # define set_posix_acl_flag(sb) do { } while (0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2995a271ec466..bbd92da0946e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1872,7 +1872,7 @@ struct super_operations { */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) -static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; } +static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) diff --git a/include/uapi/linux/bfs_fs.h b/include/uapi/linux/bfs_fs.h index 73445ef07ddad..940b04772af80 100644 --- a/include/uapi/linux/bfs_fs.h +++ b/include/uapi/linux/bfs_fs.h @@ -76,7 +76,7 @@ struct bfs_super_block { #define BFS_FILEBLOCKS(ip) \ ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) - le32_to_cpu((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & SB_RDONLY)) #endif /* _LINUX_BFS_FS_H */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d240256263103..9649ecd8a73a7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -331,7 +331,7 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, void *data) { struct ipc_namespace *ns; - if (flags & MS_KERNMOUNT) { + if (flags & SB_KERNMOUNT) { ns = data; data = NULL; } else { diff --git a/mm/shmem.c b/mm/shmem.c index 4aa9307feab0a..7fbe67be86fa8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3776,7 +3776,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) * tmpfs instance, limiting inodes to one per page of lowmem; * but the internal instance is left unlimited. */ - if (!(sb->s_flags & MS_KERNMOUNT)) { + if (!(sb->s_flags & SB_KERNMOUNT)) { sbinfo->max_blocks = shmem_default_max_blocks(); sbinfo->max_inodes = shmem_default_max_inodes(); if (shmem_parse_options(data, sbinfo, false)) { @@ -3784,12 +3784,12 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) goto failed; } } else { - sb->s_flags |= MS_NOUSER; + sb->s_flags |= SB_NOUSER; } sb->s_export_op = &shmem_export_ops; - sb->s_flags |= MS_NOSEC; + sb->s_flags |= SB_NOSEC; #else - sb->s_flags |= MS_NOUSER; + sb->s_flags |= SB_NOUSER; #endif spin_lock_init(&sbinfo->stat_lock); @@ -3809,7 +3809,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = shmem_xattr_handlers; #endif #ifdef CONFIG_TMPFS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif uuid_gen(&sb->s_uuid); diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 8542e9a55e1b8..d4fa04d914395 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2451,7 +2451,7 @@ static int __init aa_create_aafs(void) aafs_mnt = kern_mount(&aafs_ops); if (IS_ERR(aafs_mnt)) panic("can't set apparmorfs up\n"); - aafs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; + aafs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; /* Populate fs tree. */ error = entry_create_dir(&aa_sfs_entry, NULL); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f546707a2bbbe..6505e1ad9e230 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -86,7 +86,7 @@ static inline unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, static inline bool path_mediated_fs(struct dentry *dentry) { - return !(dentry->d_sb->s_flags & MS_NOUSER); + return !(dentry->d_sb->s_flags & SB_NOUSER); } From 15ca08d3299682dc49bad73251677b2c5017ef08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:10 -0400 Subject: [PATCH 184/406] nfsd: Fix stateid races between OPEN and CLOSE Open file stateids can linger on the nfs4_file list of stateids even after they have been closed. In order to avoid reusing such a stateid, and confusing the client, we need to recheck the nfs4_stid's type after taking the mutex. Otherwise, we risk reusing an old stateid that was already closed, which will confuse clients that expect new stateids to conform to RFC7530 Sections 9.1.4.2 and 16.2.5 or RFC5661 Sections 8.2.2 and 18.2.4. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 67 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b82817767b9da..ee8fde2dfa92f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3562,7 +3562,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - if (local->st_stateowner == &oo->oo_owner) { + if (local->st_stateowner != &oo->oo_owner) + continue; + if (local->st_stid.sc_type == NFS4_OPEN_STID) { ret = local; refcount_inc(&ret->st_stid.sc_count); break; @@ -3571,6 +3573,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + switch (s->sc_type) { + default: + break; + case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: + ret = nfserr_bad_stateid; + break; + case NFS4_REVOKED_DELEG_STID: + ret = nfserr_deleg_revoked; + } + return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ + __be32 ret; + + mutex_lock(&stp->st_mutex); + ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret != nfs_ok) + mutex_unlock(&stp->st_mutex); + return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *stp; + for (;;) { + spin_lock(&fp->fi_lock); + stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); + if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) + break; + nfs4_put_stid(&stp->st_stid); + } + return stp; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3615,6 +3663,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); +retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3639,7 +3688,11 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { - mutex_lock(&retstp->st_mutex); + /* Handle races with CLOSE */ + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; @@ -4460,9 +4513,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; - spin_lock(&fp->fi_lock); - stp = nfsd4_find_existing_open(fp, open); - spin_unlock(&fp->fi_lock); + stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4476,7 +4527,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); @@ -5367,7 +5417,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) bool unhashed; LIST_HEAD(reaplist); - s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -5407,10 +5456,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + + stp->st_stid.sc_type = NFS4_CLOSED_STID; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); + mutex_unlock(&stp->st_mutex); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); From d8a1a000555ecd1b824ac1ed6df8fe364dfbbbb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:11 -0400 Subject: [PATCH 185/406] nfsd: Fix another OPEN stateid race If nfsd4_process_open2() is initialising a new stateid, and yet the call to nfs4_get_vfs_file() fails for some reason, then we must declare the stateid closed, and unhash it before dropping the mutex. Right now, we unhash the stateid after dropping the mutex, and without changing the stateid type, meaning that another OPEN could theoretically look it up and attempt to use it. Reported-by: Andrew W Elble Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ee8fde2dfa92f..457f0e7ece740 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4502,6 +4502,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; + bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, @@ -4521,11 +4522,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } + if (!stp) { + stp = init_open_stateid(fp, open); + if (!open->op_stp) + new_stp = true; + } + /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. + * + * stp is already locked. */ - if (stp) { + if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { @@ -4533,22 +4542,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - /* stp is returned locked. */ - stp = init_open_stateid(fp, open); - /* See if we lost the race to some other thread */ - if (stp->st_access_bmap != 0) { - status = nfs4_upgrade_open(rqstp, fp, current_fh, - stp, open); - if (status) { - mutex_unlock(&stp->st_mutex); - goto out; - } - goto upgrade_out; - } status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - mutex_unlock(&stp->st_mutex); + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); + mutex_unlock(&stp->st_mutex); goto out; } @@ -4557,7 +4555,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } -upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); From fb500a7cfee7f2f447d2bbf30cb59629feab6ac1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:12 -0400 Subject: [PATCH 186/406] nfsd: CLOSE SHOULD return the invalid special stateid for NFSv4.x (x>0) Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 457f0e7ece740..afc04b9784a04 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,6 +63,9 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; @@ -5461,6 +5464,11 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); + /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: From 659aefb68eca28ba9aa482a9fc64de107332e256 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:13 -0400 Subject: [PATCH 187/406] nfsd: Ensure we don't recognise lock stateids after freeing them In order to deal with lookup races, nfsd4_free_lock_stateid() needs to be able to signal to other stateful functions that the lock stateid is no longer valid. Right now, nfsd_lock() will check whether or not an existing stateid is still hashed, but only in the "new lock" path. To ensure the stateid invalidation is also recognised by the "existing lock" path, and also by a second call to nfsd4_free_lock_stateid() itself, we can change the type to NFS4_CLOSED_STID under the stp->st_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index afc04b9784a04..6542b57ecc861 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5149,7 +5149,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) struct nfs4_ol_stateid *stp = openlockstateid(s); __be32 ret; - mutex_lock(&stp->st_mutex); + ret = nfsd4_lock_ol_stateid(stp); + if (ret) + goto out_put_stid; ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) @@ -5160,11 +5162,13 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) lockowner(stp->st_stateowner))) goto out; + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(stp); ret = nfs_ok; out: mutex_unlock(&stp->st_mutex); +out_put_stid: nfs4_put_stid(s); return ret; } @@ -5733,6 +5737,8 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) lockdep_assert_held(&clp->cl_lock); list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_type != NFS4_LOCK_STID) + continue; if (lst->st_stid.sc_file == fp) { refcount_inc(&lst->st_stid.sc_count); return lst; @@ -5807,7 +5813,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_lockowner *lo; struct nfs4_ol_stateid *lst; unsigned int strhashval; - bool hashed; lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) { @@ -5830,15 +5835,7 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, goto out; } - mutex_lock(&lst->st_mutex); - - /* See if it's still hashed to avoid race with FREE_STATEID */ - spin_lock(&cl->cl_lock); - hashed = !list_empty(&lst->st_perfile); - spin_unlock(&cl->cl_lock); - - if (!hashed) { - mutex_unlock(&lst->st_mutex); + if (nfsd4_lock_ol_stateid(lst) != nfs_ok) { nfs4_put_stid(&lst->st_stid); goto retry; } From fd1fd685b30867122aafbe2998ce4065c8c87e8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:14 -0400 Subject: [PATCH 188/406] nfsd4: move find_lock_stateid Trivial cleanup to simplify following patch. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6542b57ecc861..49a9741fa132e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5703,6 +5703,25 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, return ret; } +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *lst; + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_type != NFS4_LOCK_STID) + continue; + if (lst->st_stid.sc_file == fp) { + refcount_inc(&lst->st_stid.sc_count); + return lst; + } + } + return NULL; +} + static void init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, @@ -5728,25 +5747,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, spin_unlock(&fp->fi_lock); } -static struct nfs4_ol_stateid * -find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) -{ - struct nfs4_ol_stateid *lst; - struct nfs4_client *clp = lo->lo_owner.so_client; - - lockdep_assert_held(&clp->cl_lock); - - list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_stid.sc_type != NFS4_LOCK_STID) - continue; - if (lst->st_stid.sc_file == fp) { - refcount_inc(&lst->st_stid.sc_count); - return lst; - } - } - return NULL; -} - static struct nfs4_ol_stateid * find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct inode *inode, struct nfs4_ol_stateid *ost, From beeca19cf1249a917ed737729dc92337c7633bea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:14 -0400 Subject: [PATCH 189/406] nfsd: Fix race in lock stateid creation If we're looking up a new lock state, and the creation fails, then we want to unhash it, just like we do for OPEN. However in order to do so, we need to that no other LOCK requests can grab the mutex until we have unhashed it (and marked it as closed). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 70 +++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 49a9741fa132e..6d5993caf9bc6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5722,14 +5722,22 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) return NULL; } -static void +static struct nfs4_ol_stateid * init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, struct nfs4_ol_stateid *open_stp) { struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfs4_ol_stateid *retstp; - lockdep_assert_held(&clp->cl_lock); + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); +retry: + spin_lock(&clp->cl_lock); + spin_lock(&fp->fi_lock); + retstp = find_lock_stateid(lo, fp); + if (retstp) + goto out_unlock; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; @@ -5739,12 +5747,22 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); +out_unlock: spin_unlock(&fp->fi_lock); + spin_unlock(&clp->cl_lock); + if (retstp) { + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + stp = retstp; + } + return stp; } static struct nfs4_ol_stateid * @@ -5757,26 +5775,25 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *clp = oo->oo_owner.so_client; + *new = false; spin_lock(&clp->cl_lock); lst = find_lock_stateid(lo, fi); - if (lst == NULL) { - spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); - if (ns == NULL) - return NULL; - - spin_lock(&clp->cl_lock); - lst = find_lock_stateid(lo, fi); - if (likely(!lst)) { - lst = openlockstateid(ns); - init_lock_stateid(lst, lo, fi, inode, ost); - ns = NULL; - *new = true; - } - } spin_unlock(&clp->cl_lock); - if (ns) + if (lst != NULL) { + if (nfsd4_lock_ol_stateid(lst) == nfs_ok) + goto out; + nfs4_put_stid(&lst->st_stid); + } + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); + if (ns == NULL) + return NULL; + + lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost); + if (lst == openlockstateid(ns)) + *new = true; + else nfs4_put_stid(ns); +out: return lst; } @@ -5828,17 +5845,12 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, goto out; } -retry: lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (lst == NULL) { status = nfserr_jukebox; goto out; } - if (nfsd4_lock_ol_stateid(lst) != nfs_ok) { - nfs4_put_stid(&lst->st_stid); - goto retry; - } status = nfs_ok; *plst = lst; out: @@ -6044,14 +6056,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - mutex_unlock(&lock_stp->st_mutex); - /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ - if (status && new) + if (status && new) { + lock_stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(lock_stp); + } + + mutex_unlock(&lock_stp->st_mutex); nfs4_put_stid(&lock_stp->st_stid); } From 9271d7e509c1bfc0b9a418caec29ec8d1ac38270 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:15 -0400 Subject: [PATCH 190/406] nfsd: Ensure we check stateid validity in the seqid operation checks After taking the stateid st_mutex, we want to know that the stateid still represents valid state before performing any non-idempotent actions. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6d5993caf9bc6..da07c24e9372f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5234,15 +5234,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); From 03da3169c67f67729d8b52cee4e5689b0a3f94c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:16 -0400 Subject: [PATCH 191/406] nfsd: Fix races with check_stateid_generation() The various functions that call check_stateid_generation() in order to compare a client-supplied stateid with the nfs4_stid state, usually need to atomically check for closed state. Those that perform the check after locking the st_mutex using nfsd4_lock_ol_stateid() should now be OK, but we do want to fix up the others. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da07c24e9372f..ecb803d7c9660 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4906,6 +4906,18 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s return nfserr_old_stateid; } +static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session) +{ + __be32 ret; + + spin_lock(&s->sc_lock); + ret = nfsd4_verify_open_stid(s); + if (ret == nfs_ok) + ret = check_stateid_generation(in, &s->sc_stateid, has_session); + spin_unlock(&s->sc_lock); + return ret; +} + static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) { if (ols->st_stateowner->so_is_open_owner && @@ -4934,7 +4946,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; - status = check_stateid_generation(stateid, &s->sc_stateid, 1); + status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock; switch (s->sc_type) { @@ -5095,7 +5107,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, + status = nfsd4_stid_check_stateid_generation(stateid, s, nfsd4_has_session(cstate)); if (status) goto out; @@ -5188,6 +5200,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; + spin_lock(&s->sc_lock); switch (s->sc_type) { case NFS4_DELEG_STID: ret = nfserr_locks_held; @@ -5199,11 +5212,13 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = nfserr_locks_held; break; case NFS4_LOCK_STID: + spin_unlock(&s->sc_lock); refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; case NFS4_REVOKED_DELEG_STID: + spin_unlock(&s->sc_lock); dp = delegstateid(s); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); @@ -5212,6 +5227,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; /* Default falls through and returns nfserr_bad_stateid */ } + spin_unlock(&s->sc_lock); out_unlock: spin_unlock(&cl->cl_lock); out: @@ -5491,7 +5507,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; dp = delegstateid(s); - status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); + status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate)); if (status) goto put_stateid; From ba589528d6a251721a245fad07c0e3090fc9d6f5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:55:22 +0300 Subject: [PATCH 192/406] nfsd: remove net pointer from debug messages Publishing of net pointer is not safe, replace it in debug meesages by net->ns.inum [ 119.989161] nfsd: initializing export module (net: f00001e7). [ 171.767188] NFSD: starting 90-second grace period (net f00001e7) [ 322.185240] nfsd: shutting down export module (net: f00001e7). [ 322.186062] nfsd: export shutdown complete (net: f00001e7). Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 46b48dbbdd325..f9e7dd57cc12c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1230,7 +1230,7 @@ nfsd_export_init(struct net *net) int rv; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: initializing export module (net: %p).\n", net); + dprintk("nfsd: initializing export module (net: %x).\n", net->ns.inum); nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net); if (IS_ERR(nn->svc_export_cache)) @@ -1278,7 +1278,7 @@ nfsd_export_shutdown(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: shutting down export module (net: %p).\n", net); + dprintk("nfsd: shutting down export module (net: %x).\n", net->ns.inum); cache_unregister_net(nn->svc_expkey_cache, net); cache_unregister_net(nn->svc_export_cache, net); @@ -1286,5 +1286,5 @@ nfsd_export_shutdown(struct net *net) cache_destroy_net(nn->svc_export_cache, net); svcauth_unix_purge(net); - dprintk("nfsd: export shutdown complete (net: %p).\n", net); + dprintk("nfsd: export shutdown complete (net: %x).\n", net->ns.inum); } From e919b0765287f36f7e9c84aed785eddfbbd98dfa Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:55:55 +0300 Subject: [PATCH 193/406] lockd: remove net pointer from messages Publishing of net pointer is not safe, use net->ns.inum as net ID in debug messages [ 171.757678] lockd_up_net: per-net data created; net=f00001e7 [ 171.767188] NFSD: starting 90-second grace period (net f00001e7) [ 300.653313] lockd: nuking all hosts in net f00001e7... [ 300.653641] lockd: host garbage collection for net f00001e7 [ 300.653968] lockd: nlmsvc_mark_resources for net f00001e7 [ 300.711483] lockd_down_net: per-net data destroyed; net=f00001e7 [ 300.711847] lockd: nuking all hosts in net 0... [ 300.711847] lockd: host garbage collection for net 0 [ 300.711848] lockd: nlmsvc_mark_resources for net 0 Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 21 +++++++++++++-------- fs/lockd/mon.c | 3 ++- fs/lockd/svc.c | 9 +++++---- fs/lockd/svcsubs.c | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0d4e590e05498..f90f6d5082ef5 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -578,8 +578,10 @@ static void nlm_complain_hosts(struct net *net) if (ln->nrhosts == 0) return; - printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); - dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); + pr_warn("lockd: couldn't shutdown host module for net %x!\n", + net->ns.inum); + dprintk("lockd: %lu hosts left in net %x:\n", ln->nrhosts, + net->ns.inum); } else { if (nrhosts == 0) return; @@ -590,9 +592,9 @@ static void nlm_complain_hosts(struct net *net) for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; - dprintk(" %s (cnt %d use %d exp %ld net %p)\n", + dprintk(" %s (cnt %d use %d exp %ld net %x)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); + host->h_inuse, host->h_expires, host->net->ns.inum); } } @@ -605,7 +607,8 @@ nlm_shutdown_hosts_net(struct net *net) mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ - dprintk("lockd: nuking all hosts in net %p...\n", net); + dprintk("lockd: nuking all hosts in net %x...\n", + net ? net->ns.inum : 0); for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; @@ -646,7 +649,8 @@ nlm_gc_hosts(struct net *net) struct hlist_node *next; struct nlm_host *host; - dprintk("lockd: host garbage collection for net %p\n", net); + dprintk("lockd: host garbage collection for net %x\n", + net ? net->ns.inum : 0); for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; @@ -662,9 +666,10 @@ nlm_gc_hosts(struct net *net) if (atomic_read(&host->h_count) || host->h_inuse || time_before(jiffies, host->h_expires)) { dprintk("nlm_gc_hosts skipping %s " - "(cnt %d use %d exp %ld net %p)\n", + "(cnt %d use %d exp %ld net %x)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); + host->h_inuse, host->h_expires, + host->net->ns.inum); continue; } nlm_destroy_host_locked(host); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 9fbbd11f9ecbb..96cfb2967ac75 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -110,7 +110,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, clnt = nsm_create(host->net, host->nodename); if (IS_ERR(clnt)) { dprintk("lockd: failed to create NSM upcall transport, " - "status=%ld, net=%p\n", PTR_ERR(clnt), host->net); + "status=%ld, net=%x\n", PTR_ERR(clnt), + host->net->ns.inum); return PTR_ERR(clnt); } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a8e3777c94dc6..e28796bd7375f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -259,7 +259,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) if (error < 0) goto err_bind; set_grace_period(net); - dprintk("lockd_up_net: per-net data created; net=%p\n", net); + dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum); return 0; err_bind: @@ -275,11 +275,12 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); svc_shutdown_net(serv, net); - dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); + dprintk("%s: per-net data destroyed; net=%x\n", + __func__, net->ns.inum); } } else { - printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", - nlmsvc_task, net); + pr_err("%s: no users! task=%p, net=%x\n", + __func__, nlmsvc_task, net->ns.inum); BUG(); } } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a563ddbc19e69..4ec3d6e03e76d 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -370,7 +370,7 @@ nlmsvc_mark_resources(struct net *net) { struct nlm_host hint; - dprintk("lockd: nlmsvc_mark_resources for net %p\n", net); + dprintk("lockd: %s for net %x\n", __func__, net ? net->ns.inum : 0); hint.net = net; nlm_traverse_files(&hint, nlmsvc_mark_host, NULL); } From 4f34bd0540d290d1fe1bc699550025623b2761ef Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Wed, 8 Nov 2017 17:29:51 -0500 Subject: [PATCH 194/406] nfsd: fix locking validator warning on nfs4_ol_stateid->st_mutex class The use of the st_mutex has been confusing the validator. Use the proper nested notation so as to not produce warnings. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ecb803d7c9660..98342d262771e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -86,6 +86,11 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid); */ static DEFINE_SPINLOCK(state_lock); +enum nfsd4_st_mutex_lock_subclass { + OPEN_STATEID_MUTEX = 0, + LOCK_STATEID_MUTEX = 1, +}; + /* * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for * the refcount on the open stateid to drop. @@ -3600,7 +3605,7 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) { __be32 ret; - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); ret = nfsd4_verify_open_stid(&stp->st_stid); if (ret != nfs_ok) mutex_unlock(&stp->st_mutex); @@ -3664,7 +3669,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&oo->oo_owner.so_client->cl_lock); @@ -5741,7 +5746,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_ol_stateid *retstp; mutex_init(&stp->st_mutex); - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); From b872285751c1af010e12d02bce7069e2061a58ca Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:22:48 +0300 Subject: [PATCH 195/406] grace: replace BUG_ON by WARN_ONCE in exit_net hook Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 897b299db55e0..bd3e2d328e64d 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -104,7 +104,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { From a3152f1440c762128a626d90718ba6535c81c1b4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:23:24 +0300 Subject: [PATCH 196/406] lockd: added cleanup checks in exit_net hook Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e28796bd7375f..cb659ef789c08 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -677,6 +677,17 @@ static int lockd_init_net(struct net *net) static void lockd_exit_net(struct net *net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + WARN_ONCE(!list_empty(&ln->lockd_manager.list), + "net %x %s: lockd_manager.list is not empty\n", + net->ns.inum, __func__); + WARN_ONCE(!list_empty(&ln->nsm_handles), + "net %x %s: nsm_handles list is not empty\n", + net->ns.inum, __func__); + WARN_ONCE(delayed_work_pending(&ln->grace_period_end), + "net %x %s: grace_period_end was not cancelled\n", + net->ns.inum, __func__); } static struct pernet_operations lockd_net_ops = { From 3a2b19d1ee5633f76ae8a88da7bc039a5d1732aa Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Nov 2017 13:03:42 +0300 Subject: [PATCH 197/406] lockd: lost rollback of set_grace_period() in lockd_down_net() Commit efda760fe95ea ("lockd: fix lockd shutdown race") is incorrect, it removes lockd_manager and disarm grace_period_end for init_net only. If nfsd was started from another net namespace lockd_up_net() calls set_grace_period() that adds lockd_manager into per-netns list and queues grace_period_end delayed work. These action should be reverted in lockd_down_net(). Otherwise it can lead to double list_add on after restart nfsd in netns, and to use-after-free if non-disarmed delayed work will be executed after netns destroy. Fixes: efda760fe95e ("lockd: fix lockd shutdown race") Cc: stable@vger.kernel.org Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index cb659ef789c08..4acf0ebd9802a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -274,6 +274,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("%s: per-net data destroyed; net=%x\n", __func__, net->ns.inum); From 64ebe12494fd5d193f014ce38e1fd83cc57883c8 Mon Sep 17 00:00:00 2001 From: Naofumi Honda Date: Thu, 9 Nov 2017 10:57:16 -0500 Subject: [PATCH 198/406] nfsd: fix panic in posix_unblock_lock called from nfs4_laundromat From kernel 4.9, my two nfsv4 servers sometimes suffer from "panic: unable to handle kernel page request" in posix_unblock_lock() called from nfs4_laundromat(). These panics diseappear if we revert the commit "nfsd: add a LRU list for blocked locks". The cause appears to be a typo in nfs4_laundromat(), which is also present in nfs4_state_shutdown_net(). Cc: stable@vger.kernel.org Fixes: 7919d0a27f1e "nfsd: add a LRU list for blocked locks" Cc: jlayton@redhat.com Reveiwed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 98342d262771e..d0992d59f4e14 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4790,7 +4790,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); @@ -7236,7 +7236,7 @@ nfs4_state_shutdown_net(struct net *net) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); From ae254dac721d44c0bfebe2795df87459e2e88219 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 9 Nov 2017 13:41:10 -0500 Subject: [PATCH 199/406] nfsd: check for use of the closed special stateid Prevent the use of the closed (invalid) special stateid by clients. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0992d59f4e14..73451436c1c20 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -72,6 +72,7 @@ static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -4936,7 +4937,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4994,7 +4996,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { From d34971a65b72619508f49cd237283e92f1c329d5 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:23 +0200 Subject: [PATCH 200/406] sunrpc: make the function arg as const Make the struct cache_detail *tmpl argument of the function cache_create_net as const as it is only getting passed to kmemup having the argument as const void *. Add const to the prototype too. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- net/sunrpc/cache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 270bad0e1bed1..40d2822f0e2f1 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -213,7 +213,7 @@ extern void __init cache_initialize(void); extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); -extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net); extern void cache_destroy_net(struct cache_detail *cd, struct net *net); extern void sunrpc_init_cache_detail(struct cache_detail *cd); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 79d55d949d9a7..e68943895be48 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) +struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; int i; From ae2e408ec2e861b0e7dceea1808e4305a9381c2f Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:25 +0200 Subject: [PATCH 201/406] NFSD: make cache_detail structures const Make these const as they are only getting passed to the function cache_create_net having the argument as const. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 4 ++-- fs/nfsd/nfs4idmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index f9e7dd57cc12c..8ceb25a10ea0d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -232,7 +232,7 @@ static struct cache_head *expkey_alloc(void) return NULL; } -static struct cache_detail svc_expkey_cache_template = { +static const struct cache_detail svc_expkey_cache_template = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", @@ -748,7 +748,7 @@ static struct cache_head *svc_export_alloc(void) return NULL; } -static struct cache_detail svc_export_cache_template = { +static const struct cache_detail svc_export_cache_template = { .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 6b9b6cca469f4..a5bb76593ce72 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -178,7 +178,7 @@ static struct ent *idtoname_lookup(struct cache_detail *, struct ent *); static struct ent *idtoname_update(struct cache_detail *, struct ent *, struct ent *); -static struct cache_detail idtoname_cache_template = { +static const struct cache_detail idtoname_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", @@ -341,7 +341,7 @@ static struct ent *nametoid_update(struct cache_detail *, struct ent *, struct ent *); static int nametoid_parse(struct cache_detail *, char *, int); -static struct cache_detail nametoid_cache_template = { +static const struct cache_detail nametoid_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", From ee24eac3ebb781c12a654985e33ecaa07f4d0f95 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:26 +0200 Subject: [PATCH 202/406] SUNRPC: make cache_detail structures const Make these const as they are only getting passed to the function cache_create_net having the argument as const. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svcauth_unix.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73165e9ca5bfd..5dd4e6c9fef21 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -264,7 +264,7 @@ static int rsi_parse(struct cache_detail *cd, return status; } -static struct cache_detail rsi_cache_template = { +static const struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", @@ -524,7 +524,7 @@ static int rsc_parse(struct cache_detail *cd, return status; } -static struct cache_detail rsc_cache_template = { +static const struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .name = "auth.rpcsec.context", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f81eaa8e08888..740b67d5a733b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -569,7 +569,7 @@ static int unix_gid_show(struct seq_file *m, return 0; } -static struct cache_detail unix_gid_cache_template = { +static const struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, .name = "auth.unix.gid", @@ -862,7 +862,7 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; -static struct cache_detail ip_map_cache_template = { +static const struct cache_detail ip_map_cache_template = { .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .name = "auth.unix.ip", From 6b18dd1c03e07262ea0866084856b2a3c5ba8d09 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Nov 2017 10:19:26 +0300 Subject: [PATCH 203/406] race of lockd inetaddr notifiers vs nlmsvc_rqst change lockd_inet[6]addr_event use nlmsvc_rqst without taken nlmsvc_mutex, nlmsvc_rqst can be changed during execution of notifiers and crash the host. Patch enables access to nlmsvc_rqst only when it was correctly initialized and delays its cleanup until notifiers are no longer in use. Note that nlmsvc_rqst can be temporally set to ERR_PTR, so the "if (nlmsvc_rqst)" check in notifiers is insufficient on its own. Signed-off-by: Vasily Averin Tested-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 4acf0ebd9802a..9c36d614bf896 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,6 +57,9 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); + unsigned int lockd_net_id; /* @@ -293,7 +296,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -304,6 +308,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -320,7 +326,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -332,6 +339,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin6); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -348,10 +357,12 @@ static void lockd_unregister_notifiers(void) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif + wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); } static void lockd_svc_exit_thread(void) { + atomic_dec(&nlm_ntf_refcnt); lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -376,6 +387,7 @@ static int lockd_start_svc(struct svc_serv *serv) goto out_rqst; } + atomic_inc(&nlm_ntf_refcnt); svc_sock_update_bufs(serv); serv->sv_maxconn = nlm_max_connections; From 2317dc557a3b6d5b73b697034611d658eb2cbde9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Nov 2017 10:19:35 +0300 Subject: [PATCH 204/406] race of nfsd inetaddr notifiers vs nn->nfsd_serv change nfsd_inet[6]addr_event uses nn->nfsd_serv without taking nfsd_mutex, which can be changed during execution of notifiers and crash the host. Moreover if notifiers were enabled in one net namespace they are enabled in all other net namespaces, from creation until destruction. This patch allows notifiers to access nn->nfsd_serv only after the pointer is correctly initialized and delays cleanup until notifiers are no longer in use. Signed-off-by: Vasily Averin Tested-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 3 +++ fs/nfsd/nfsctl.c | 3 +++ fs/nfsd/nfssvc.c | 14 +++++++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1c91391f48055..36358d435cb04 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -119,6 +119,9 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; + + wait_queue_head_t ntf_wq; + atomic_t ntf_refcnt; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6493df6b1bd5f..d107b4426f7eb 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1241,6 +1241,9 @@ static __net_init int nfsd_init_net(struct net *net) nn->nfsd4_grace = 90; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); + + atomic_set(&nn->ntf_refcnt, 0); + init_waitqueue_head(&nn->ntf_wq); return 0; out_idmap_error: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 33117d4ffce07..89cb484f1cfbe 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -335,7 +335,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; if (nn->nfsd_serv) { @@ -344,6 +345,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; @@ -363,7 +366,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; if (nn->nfsd_serv) { @@ -374,7 +378,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; } @@ -391,6 +396,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -398,6 +404,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } + wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -517,6 +524,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } + atomic_inc(&nn->ntf_refcnt); ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ return 0; } From 9e137ed5abcb21c0efffb1b6a7c48f2d21980303 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 30 Oct 2017 16:47:58 +0300 Subject: [PATCH 205/406] nlm_shutdown_hosts_net() cleanup nlm_complain_hosts() walks through nlm_server_hosts hlist, which should be protected by nlm_host_mutex. Signed-off-by: Vasily Averin Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f90f6d5082ef5..826a89184f90f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -621,9 +621,8 @@ nlm_shutdown_hosts_net(struct net *net) /* Then, perform a garbage collection pass */ nlm_gc_hosts(net); - mutex_unlock(&nlm_host_mutex); - nlm_complain_hosts(net); + mutex_unlock(&nlm_host_mutex); } /* From 81833de1a46edce9ca20cfe079872ac1c20ef359 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 13 Nov 2017 07:25:40 +0300 Subject: [PATCH 206/406] lockd: fix "list_add double add" caused by legacy signal interface restart_grace() uses hardcoded init_net. It can cause to "list_add double add" in following scenario: 1) nfsd and lockd was started in several net namespaces 2) nfsd in init_net was stopped (lockd was not stopped because it have users from another net namespaces) 3) lockd got signal, called restart_grace() -> set_grace_period() and enabled lock_manager in hardcoded init_net. 4) nfsd in init_net is started again, its lockd_up() calls set_grace_period() and tries to add lock_manager into init_net 2nd time. Jeff Layton suggest: "Make it safe to call locks_start_grace multiple times on the same lock_manager. If it's already on the global grace_list, then don't try to add it again. (But we don't intentionally add twice, so for now we WARN about that case.) With this change, we also need to ensure that the nfsd4 lock manager initializes the list before we call locks_start_grace. While we're at it, move the rest of the nfsd_net initialization into nfs4_state_create_net. I see no reason to have it spread over two functions like it is today." Suggested patch was updated to generate warning in described situation. Suggested-by: Jeff Layton Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 6 +++++- fs/nfsd/nfs4state.c | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index bd3e2d328e64d..5be08f02a76bc 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 73451436c1c20..b29b5a185a2cb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7103,6 +7103,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7160,9 +7164,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", From c30bf8cecebabb119aa1fbb17de84f924a8ab8f7 Mon Sep 17 00:00:00 2001 From: Gustavo A R Silva Date: Wed, 18 Oct 2017 15:34:25 -0500 Subject: [PATCH 207/406] i40e/virtchnl: fix application of sizeof to pointer sizeof when applied to a pointer typed expression gives the size of the pointer. The proper fix in this particular case is to code sizeof(*vfres) instead of sizeof(vfres). This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A R Silva Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a3dc9b9329465..36cb8e068e856 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2086,7 +2086,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) } return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, - (u8 *)vfres, sizeof(vfres)); + (u8 *)vfres, sizeof(*vfres)); } /** From c0f4b163a03e73055dd734eaca64b9580e72e7fb Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 6 Nov 2017 08:31:59 +0200 Subject: [PATCH 208/406] e1000e: fix the use of magic numbers for buffer overrun issue This is a follow on to commit b10effb92e27 ("fix buffer overrun while the I219 is processing DMA transactions") to address David Laights concerns about the use of "magic" numbers. So define masks as well as add additional code comments to give a better understanding of what needs to be done to avoid a buffer overrun. Signed-off-by: Sasha Neftin Reviewed-by: Alexander H Duyck Reviewed-by: Dima Ruinskiy Reviewed-by: Raanan Avargil Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 ++- drivers/net/ethernet/intel/e1000e/netdev.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 67163ca898ba2..00a36df02a3fd 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -113,7 +113,8 @@ #define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field */ #define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs */ #define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */ -#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29) +#define E1000_TARC0_CB_MULTIQ_3_REQ 0x30000000 +#define E1000_TARC0_CB_MULTIQ_2_REQ 0x20000000 #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index f2f49239b0150..9f18d39bdc8f7 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3034,9 +3034,12 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) ew32(IOSFPC, reg_val); reg_val = er32(TARC(0)); - /* SPT and KBL Si errata workaround to avoid Tx hang */ - reg_val &= ~BIT(28); - reg_val |= BIT(29); + /* SPT and KBL Si errata workaround to avoid Tx hang. + * Dropping the number of outstanding requests from + * 3 to 2 in order to avoid a buffer overrun. + */ + reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ; + reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ; ew32(TARC(0), reg_val); } } From 80752a98a0a0f666b263fecea327d4018b3f36f9 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 17 Nov 2017 15:35:57 -0800 Subject: [PATCH 209/406] i40e: Fix reporting incorrect error codes Adding cloud filters could fail for a number of reasons, unsupported filter fields for example, which fails during validation of fields itself. This will not result in admin command errors and converting the admin queue status to posix error code using i40e_aq_rc_to_posix would result in incorrect error values. If the failure was due to AQ error itself, reporting that correctly is handled in the inner function. Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4c08cc86463e3..321d8be80871c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7401,7 +7401,6 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %s\n", i40e_stat_str(&pf->hw, err)); - err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); goto err; } From 8abd20b458cafd4c4e632fe7fb8f1a9861b02ee0 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Sat, 18 Nov 2017 21:53:58 +0100 Subject: [PATCH 210/406] e1000: Fix off-by-one in debug message Signed-off-by: Ahmad Fatoum Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_hw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 8172cf08cc330..3bac9df1c0994 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -4307,8 +4307,10 @@ static void e1000_init_rx_addrs(struct e1000_hw *hw) rar_num = E1000_RAR_ENTRIES; - /* Zero out the other 15 receive addresses. */ - e_dbg("Clearing RAR[1-15]\n"); + /* Zero out the following 14 receive addresses. RAR[15] is for + * manageability + */ + e_dbg("Clearing RAR[1-14]\n"); for (i = 1; i < rar_num; i++) { E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); E1000_WRITE_FLUSH(); From b12cbb21586277f72533769832c24cc6c1d60ab3 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 22 Nov 2017 07:33:38 -0800 Subject: [PATCH 211/406] apparmor: fix oops in audit_signal_cb hook The apparmor_audit_data struct ordering got messed up during a merge conflict, resulting in the signal integer and peer pointer being in a union instead of a struct. For most of the 4.13 and 4.14 life cycle, this was hidden by commit 651e28c5537a ("apparmor: add base infastructure for socket mediation") which fixed the apparmor_audit_data struct when its data was added. When that commit was reverted in -rc7 the signal audit bug was exposed, and unfortunately it never showed up in any of the testing until after 4.14 was released. Shaun Khan, Zephaniah E. Loss-Cutler-Hull filed nearly simultaneous bug reports (with different oopes, the smaller of which is included below). Full credit goes to Tetsuo Handa for jumping on this as well and noticing the audit data struct problem and reporting it. [ 76.178568] BUG: unable to handle kernel paging request at ffffffff0eee3bc0 [ 76.178579] IP: audit_signal_cb+0x6c/0xe0 [ 76.178581] PGD 1a640a067 P4D 1a640a067 PUD 0 [ 76.178586] Oops: 0000 [#1] PREEMPT SMP [ 76.178589] Modules linked in: fuse rfcomm bnep usblp uvcvideo btusb btrtl btbcm btintel bluetooth ecdh_generic ip6table_filter ip6_tables xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables intel_rapl joydev wmi_bmof serio_raw iwldvm iwlwifi shpchp kvm_intel kvm irqbypass autofs4 algif_skcipher nls_iso8859_1 nls_cp437 crc32_pclmul ghash_clmulni_intel [ 76.178620] CPU: 0 PID: 10675 Comm: pidgin Not tainted 4.14.0-f1-dirty #135 [ 76.178623] Hardware name: Hewlett-Packard HP EliteBook Folio 9470m/18DF, BIOS 68IBD Ver. F.62 10/22/2015 [ 76.178625] task: ffff9c7a94c31dc0 task.stack: ffffa09b02a4c000 [ 76.178628] RIP: 0010:audit_signal_cb+0x6c/0xe0 [ 76.178631] RSP: 0018:ffffa09b02a4fc08 EFLAGS: 00010292 [ 76.178634] RAX: ffffa09b02a4fd60 RBX: ffff9c7aee0741f8 RCX: 0000000000000000 [ 76.178636] RDX: ffffffffee012290 RSI: 0000000000000006 RDI: ffff9c7a9493d800 [ 76.178638] RBP: ffffa09b02a4fd40 R08: 000000000000004d R09: ffffa09b02a4fc46 [ 76.178641] R10: ffffa09b02a4fcb8 R11: ffff9c7ab44f5072 R12: ffffa09b02a4fd40 [ 76.178643] R13: ffffffff9e447be0 R14: ffff9c7a94c31dc0 R15: 0000000000000001 [ 76.178646] FS: 00007f8b11ba2a80(0000) GS:ffff9c7afea00000(0000) knlGS:0000000000000000 [ 76.178648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 76.178650] CR2: ffffffff0eee3bc0 CR3: 00000003d5209002 CR4: 00000000001606f0 [ 76.178652] Call Trace: [ 76.178660] common_lsm_audit+0x1da/0x780 [ 76.178665] ? d_absolute_path+0x60/0x90 [ 76.178669] ? aa_check_perms+0xcd/0xe0 [ 76.178672] aa_check_perms+0xcd/0xe0 [ 76.178675] profile_signal_perm.part.0+0x90/0xa0 [ 76.178679] aa_may_signal+0x16e/0x1b0 [ 76.178686] apparmor_task_kill+0x51/0x120 [ 76.178690] security_task_kill+0x44/0x60 [ 76.178695] group_send_sig_info+0x25/0x60 [ 76.178699] kill_pid_info+0x36/0x60 [ 76.178703] SYSC_kill+0xdb/0x180 [ 76.178707] ? preempt_count_sub+0x92/0xd0 [ 76.178712] ? _raw_write_unlock_irq+0x13/0x30 [ 76.178716] ? task_work_run+0x6a/0x90 [ 76.178720] ? exit_to_usermode_loop+0x80/0xa0 [ 76.178723] entry_SYSCALL_64_fastpath+0x13/0x94 [ 76.178727] RIP: 0033:0x7f8b0e58b767 [ 76.178729] RSP: 002b:00007fff19efd4d8 EFLAGS: 00000206 ORIG_RAX: 000000000000003e [ 76.178732] RAX: ffffffffffffffda RBX: 0000557f3e3c2050 RCX: 00007f8b0e58b767 [ 76.178735] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000263b [ 76.178737] RBP: 0000000000000000 R08: 0000557f3e3c2270 R09: 0000000000000001 [ 76.178739] R10: 000000000000022d R11: 0000000000000206 R12: 0000000000000000 [ 76.178741] R13: 0000000000000001 R14: 0000557f3e3c13c0 R15: 0000000000000000 [ 76.178745] Code: 48 8b 55 18 48 89 df 41 b8 20 00 08 01 5b 5d 48 8b 42 10 48 8b 52 30 48 63 48 4c 48 8b 44 c8 48 31 c9 48 8b 70 38 e9 f4 fd 00 00 <48> 8b 14 d5 40 27 e5 9e 48 c7 c6 7d 07 19 9f 48 89 df e8 fd 35 [ 76.178794] RIP: audit_signal_cb+0x6c/0xe0 RSP: ffffa09b02a4fc08 [ 76.178796] CR2: ffffffff0eee3bc0 [ 76.178799] ---[ end trace 514af9529297f1a3 ]--- Fixes: cd1dbf76b23d ("apparmor: add the ability to mediate signals") Reported-by: Zephaniah E. Loss-Cutler-Hull Reported-by: Shuah Khan Suggested-by: Tetsuo Handa Tested-by: Ivan Kozik Tested-by: Zephaniah E. Loss-Cutler-Hull Tested-by: Christian Boltz Tested-by: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: John Johansen --- security/apparmor/include/audit.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 620e811696592..4ac0951187170 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,17 +121,19 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - struct { - const char *target; - kuid_t ouid; - } fs; + union { + struct { + const char *target; + kuid_t ouid; + } fs; + int signal; + }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; - int signal; struct { int rlim; unsigned long max; From 8f5abe842e84ba9e72485ddd9dc02a3562b54e2a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Nov 2017 16:45:56 -0800 Subject: [PATCH 212/406] proc: don't report kernel addresses in /proc//stack This just changes the file to report them as zero, although maybe even that could be removed. I checked, and at least procps doesn't actually seem to parse the 'stack' file at all. And since the file doesn't necessarily even exist (it requires CONFIG_STACKTRACE), possibly other tools don't really use it either. That said, in case somebody parses it with tools, just having that zero there should keep such tools happy. Signed-off-by: Linus Torvalds --- fs/proc/base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 31934cb9dfc88..28fa85276eec6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -443,8 +443,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, save_stack_trace_tsk(task, &trace); for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%pK>] %pB\n", - (void *)entries[i], (void *)entries[i]); + seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); } unlock_trace(task); } From bd467e4eababe4c04272c1e646f066db02734c79 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: [PATCH 213/406] hwmon: (pmbus) Use 64bit math for DIRECT format values Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 52a58b8b6e1bd..a139940cd991a 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -499,8 +500,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -528,11 +529,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -656,7 +658,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -673,18 +676,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, From ae5c631e605a452a5a0e73205a92810c01ed954b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:56 +0200 Subject: [PATCH 214/406] drm/i915: Don't try indexed reads to alternate slave addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can only specify the one slave address to indexed reads/writes. Make sure the messages we check are destined to the same slave address before deciding to do an indexed transfer. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit c4deb62d7821672265b87952bcd1c808f3bf3e8f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index eb5827110d8ff..8affd47b98b81 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -438,6 +438,7 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && + msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && (msgs[i + 1].flags & I2C_M_RD)); } From 56350fb8978bbf4aafe08f21234e161dd128b417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:57 +0200 Subject: [PATCH 215/406] drm/i915: Prevent zero length "index" write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware always writes one or two bytes in the index portion of an indexed transfer. Make sure the message we send as the index doesn't have a zero length. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit bb9e0d4bca50f429152e74a459160b41f3d60fb2) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 8affd47b98b81..49fdf09f9919c 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -439,7 +439,8 @@ gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } From a45b30a6c5db631e2ba680304bd5edd0cd1f9643 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Nov 2017 19:41:55 +0000 Subject: [PATCH 216/406] drm/i915/fbdev: Serialise early hotplug events with async fbdev config As both the hotplug event and fbdev configuration run asynchronously, it is possible for them to run concurrently. If configuration fails, we were freeing the fbdev causing a use-after-free in the hotplug event. <7>[ 3069.935211] [drm:intel_fb_initial_config [i915]] Not using firmware configuration <7>[ 3069.935225] [drm:drm_setup_crtcs] looking for cmdline mode on connector 77 <7>[ 3069.935229] [drm:drm_setup_crtcs] looking for preferred mode on connector 77 0 <7>[ 3069.935233] [drm:drm_setup_crtcs] found mode 3200x1800 <7>[ 3069.935236] [drm:drm_setup_crtcs] picking CRTCs for 8192x8192 config <7>[ 3069.935253] [drm:drm_setup_crtcs] desired mode 3200x1800 set on crtc 43 (0,0) <7>[ 3069.935323] [drm:intelfb_create [i915]] no BIOS fb, allocating a new one <4>[ 3069.967737] general protection fault: 0000 [#1] PREEMPT SMP <0>[ 3069.977453] --------------------------------- <4>[ 3069.977457] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm r8169 mei_me mii prime_numbers mei i2c_hid pinctrl_geminilake pinctrl_intel [last unloaded: i915] <4>[ 3069.977492] CPU: 1 PID: 15414 Comm: kworker/1:0 Tainted: G U 4.14.0-CI-CI_DRM_3388+ #1 <4>[ 3069.977497] Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 <4>[ 3069.977508] Workqueue: events output_poll_execute <4>[ 3069.977512] task: ffff880177734e40 task.stack: ffffc90001fe4000 <4>[ 3069.977519] RIP: 0010:__lock_acquire+0x109/0x1b60 <4>[ 3069.977523] RSP: 0018:ffffc90001fe7bb0 EFLAGS: 00010002 <4>[ 3069.977526] RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000282 RCX: 0000000000000000 <4>[ 3069.977530] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880170d4efd0 <4>[ 3069.977534] RBP: ffffc90001fe7c70 R08: 0000000000000001 R09: 0000000000000000 <4>[ 3069.977538] R10: 0000000000000000 R11: ffffffff81899609 R12: ffff880170d4efd0 <4>[ 3069.977542] R13: ffff880177734e40 R14: 0000000000000001 R15: 0000000000000000 <4>[ 3069.977547] FS: 0000000000000000(0000) GS:ffff88017fc80000(0000) knlGS:0000000000000000 <4>[ 3069.977551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 3069.977555] CR2: 00007f7e8b7bcf04 CR3: 0000000003e0f000 CR4: 00000000003406e0 <4>[ 3069.977559] Call Trace: <4>[ 3069.977565] ? mark_held_locks+0x64/0x90 <4>[ 3069.977571] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977575] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977579] ? trace_hardirqs_on_caller+0xde/0x1c0 <4>[ 3069.977583] ? _raw_spin_unlock_irq+0x2f/0x50 <4>[ 3069.977588] ? finish_task_switch+0xa5/0x210 <4>[ 3069.977592] ? lock_acquire+0xaf/0x200 <4>[ 3069.977596] lock_acquire+0xaf/0x200 <4>[ 3069.977600] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977604] _raw_spin_lock+0x2a/0x40 <4>[ 3069.977608] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977612] __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977616] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977621] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977625] drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977630] output_poll_execute+0x8d/0x180 <4>[ 3069.977635] process_one_work+0x22e/0x660 <4>[ 3069.977640] worker_thread+0x48/0x3a0 <4>[ 3069.977644] ? _raw_spin_unlock_irqrestore+0x4c/0x60 <4>[ 3069.977649] kthread+0x102/0x140 <4>[ 3069.977653] ? process_one_work+0x660/0x660 <4>[ 3069.977657] ? kthread_create_on_node+0x40/0x40 <4>[ 3069.977662] ret_from_fork+0x27/0x40 <4>[ 3069.977666] Code: 8d 62 f8 c3 49 81 3c 24 e0 fa 3c 82 41 be 00 00 00 00 45 0f 45 f0 83 fe 01 77 86 89 f0 49 8b 44 c4 08 48 85 c0 0f 84 76 ff ff ff ff 80 38 01 00 00 8b 1d 62 f9 e8 01 45 8b 85 b8 08 00 00 85 <1>[ 3069.977707] RIP: __lock_acquire+0x109/0x1b60 RSP: ffffc90001fe7bb0 <4>[ 3069.977712] ---[ end trace 4ad012eb3af62df7 ]--- In order to keep the dev_priv->ifbdev alive after failure, we have to avoid the free and leave it empty until we unload the module (which is less than ideal, but a necessary evil for simplicity). Then we can use intel_fbdev_sync() to serialise the hotplug event with the configuration. The serialisation between the two was removed in commit 934458c2c95d ("Revert "drm/i915: Fix races on fbdev""), but the use after free is much older, commit 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 934458c2c95d ("Revert "drm/i915: Fix races on fbdev"") Signed-off-by: Chris Wilson Cc: Lukas Wunner Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/20171125194155.355-1-chris@chris-wilson.co.uk (cherry picked from commit ad88d7fc6c032ddfb32b8d496a070ab71de3a64f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b8af35187d226..ea96682568e88 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -697,10 +697,8 @@ static void intel_fbdev_initial_config(void *data, async_cookie_t cookie) /* Due to peculiar init order wrt to hpd handling this is separate. */ if (drm_fb_helper_initial_config(&ifbdev->helper, - ifbdev->preferred_bpp)) { + ifbdev->preferred_bpp)) intel_fbdev_unregister(to_i915(ifbdev->helper.dev)); - intel_fbdev_fini(to_i915(ifbdev->helper.dev)); - } } void intel_fbdev_initial_config_async(struct drm_device *dev) @@ -800,7 +798,11 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; - if (ifbdev) + if (!ifbdev) + return; + + intel_fbdev_sync(ifbdev); + if (ifbdev->vma) drm_fb_helper_hotplug_event(&ifbdev->helper); } From ac29fc66855b79c2960c63a4a66952d5b721d698 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Nov 2017 16:10:27 +0100 Subject: [PATCH 217/406] drm/i915: fix intel_backlight_device_register declaration The alternative intel_backlight_device_register() definition apparently never got used, but I have now run into a case of i915 being compiled without CONFIG_BACKLIGHT_CLASS_DEVICE, resulting in a number of identical warnings: drivers/gpu/drm/i915/intel_drv.h:1739:12: error: 'intel_backlight_device_register' defined but not used [-Werror=unused-function] This marks the function as 'inline', which was surely the original intention here. Fixes: 1ebaa0b9c2d4 ("drm/i915: Move backlight registration to connector registration") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171127151239.1813673-1-arnd@arndb.de (cherry picked from commit 2de2d0b063b08becb2c67a2c338c44e37bdcffee) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7bc60c848940f..6c7f8bca574eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1736,7 +1736,7 @@ extern struct drm_display_mode *intel_find_panel_downclock( int intel_backlight_device_register(struct intel_connector *connector); void intel_backlight_device_unregister(struct intel_connector *connector); #else /* CONFIG_BACKLIGHT_CLASS_DEVICE */ -static int intel_backlight_device_register(struct intel_connector *connector) +static inline int intel_backlight_device_register(struct intel_connector *connector) { return 0; } From 679fd3ebabc23dec33d42525b19479f16f355e61 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Nov 2017 14:58:31 +0800 Subject: [PATCH 218/406] drm/i915/gvt: Fix unsafe locking caused by spin_unlock_bh The caller of shadow_context_status_change may disable irqs. So it is not safe to use spin_unlock_bh in such context. Let's switch to irqsave version for safety. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 4504 at kernel/softirq.c:161 __local_bh_enable_ip+0x46/0x60 [ 168.797710] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.2.8 01/26/2016 [ 168.797712] task: ffff8c693d22db80 task.stack: ffffb51b482bc000 [ 168.797718] RIP: 0010:__local_bh_enable_ip+0x46/0x60 [ 168.797721] RSP: 0018:ffffb51b482bfa10 EFLAGS: 00010046 [ 168.797724] RAX: 0000000000000046 RBX: ffff8c6900278000 RCX: 00000000ffffffff [ 168.797726] RDX: 0000000000000001 RSI: 0000000000000200 RDI: ffffffffc06a0330 [ 168.797728] RBP: ffffb51b482bfa10 R08: 0000000000000000 R09: ffff8c690027cb90 [ 168.797730] R10: ffffb51b482bfa40 R11: 00000004072f0001 R12: 0000000000000000 [ 168.797732] R13: 0000000000000000 R14: ffff8c690027ca9c R15: 0000000000000000 [ 168.797735] FS: 00007ff187c56700(0000) GS:ffff8c6959d00000(0000) knlGS:0000000000000000 [ 168.797738] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 168.797740] CR2: 0000562bc0c3991f CR3: 0000000430614006 CR4: 00000000003606e0 [ 168.797742] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 168.797744] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 168.797745] Call Trace: [ 168.797755] _raw_spin_unlock_bh+0x1e/0x20 [ 168.797826] shadow_context_status_change+0x120/0x1e0 [i915] [ 168.797831] notifier_call_chain+0x4a/0x70 [ 168.797834] atomic_notifier_call_chain+0x1a/0x20 [ 168.797896] execlists_cancel_port_requests+0x4f/0x80 [i915] [ 168.797956] reset_common_ring+0x30/0x100 [i915] [ 168.798007] i915_gem_reset_engine+0x114/0x330 [i915] [ 168.798060] ? i915_gem_retire_requests+0x75/0x180 [i915] [ 168.798111] i915_gem_reset+0x3e/0xb0 [i915] [ 168.798149] i915_reset+0x10b/0x1c0 [i915] [ 168.798187] i915_reset_device+0x209/0x220 [i915] [ 168.798225] ? gen8_gt_irq_ack+0x170/0x170 [i915] [ 168.798229] ? __queue_work+0x430/0x430 [ 168.798270] i915_handle_error+0x285/0x420 [i915] [ 168.798275] ? mntput+0x24/0x40 [ 168.798281] ? terminate_walk+0x8e/0xf0 [ 168.798328] i915_wedged_set+0x84/0xc0 [i915] [ 168.798333] simple_attr_write+0xab/0xc0 [ 168.798337] full_proxy_write+0x54/0x90 [ 168.798343] __vfs_write+0x37/0x170 [ 168.798349] ? common_file_perm+0x4c/0x100 [ 168.798355] ? apparmor_file_permission+0x1a/0x20 [ 168.798361] ? security_file_permission+0x3b/0xc0 [ 168.798365] vfs_write+0xb8/0x1b0 [ 168.798370] SyS_write+0x55/0xc0 [ 168.798376] entry_SYSCALL_64_fastpath+0x1e/0xa9 Fixes: 0e86cc9 ("drm/i915/gvt: implement per-vm mmio switching optimization") Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2cc..f031234290f6b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -140,9 +140,10 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id ring_id = req->engine->id; struct intel_vgpu_workload *workload; + unsigned long flags; if (!is_gvt_request(req)) { - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (action == INTEL_CONTEXT_SCHEDULE_IN && scheduler->engine_owner[ring_id]) { /* Switch ring from vGPU to host. */ @@ -150,7 +151,7 @@ static int shadow_context_status_change(struct notifier_block *nb, NULL, ring_id); scheduler->engine_owner[ring_id] = NULL; } - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); return NOTIFY_OK; } @@ -161,7 +162,7 @@ static int shadow_context_status_change(struct notifier_block *nb, switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (workload->vgpu != scheduler->engine_owner[ring_id]) { /* Switch ring from host to vGPU or vGPU to vGPU. */ intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], @@ -170,7 +171,7 @@ static int shadow_context_status_change(struct notifier_block *nb, } else gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", ring_id, workload->vgpu->id); - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: From bf3a26b3cba59e2eee91cec3de6fdb4e8e670295 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 21 Nov 2017 10:54:41 +0800 Subject: [PATCH 219/406] drm/i915/gvt: remove skl_misc_ctl_write handler With different settings of compressed data hash mode between VMs and host may cause gpu issues. Commit: 1999f108c ("drm/i915/gvt: Disable compression workaround for Gen9") disable compression workaround of guest in gvt host to align with host. Commit: 93564044f ("drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS") add compression workaround, then we can remove the skl_misc_ctl_write hanlder. Better solution should be always keeping same settings as host, and bypass the write request from VMs, but it need to fetch data from host's "Context". Cc: Zhi Wang Signed-off-by: Weinan Li Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 45 ++++------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b926..44cd5ff5e97da 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1381,40 +1381,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1671,8 +1637,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2564,8 +2530,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2615,8 +2580,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); MMIO_D(0x45504, D_SKL_PLUS); MMIO_D(0x45520, D_SKL_PLUS); MMIO_D(0x46000, D_SKL_PLUS); From c3c80f0736bb2767fab983d1ae53252b2ddd405d Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 14 Nov 2017 17:09:35 +0800 Subject: [PATCH 220/406] drm/i915/gvt: Move request alloc to dispatch_workload path only Previously the performance is improved through the workload auditing and shadowing ahead of vGPU scheduling, however, there is the case that more requests are allocated in submit_context before the previous request is added, the timeline will hold its seqno which is later. This patch is to move the request alloc to dispatch_workload function, where is the same place as request is added. It will fix the issue of kernel BUG for (timeline->seqno != request->fence.seqno) check when add_request. Fixes: 89ea20b930cb ("drm/i915/gvt: Factor out scan and shadow from workload dispatch") Signed-off-by: Chuanxiao Dong Signed-off-by: fred gao Signed-off-by: Zhenyu Wang (cherry picked from commit f2880e04f3a5419366926182fc97a3c2e4fd8f2a) --- drivers/gpu/drm/i915/gvt/execlist.c | 6 ++++++ drivers/gpu/drm/i915/gvt/scheduler.c | 24 ++++++++++++++++++++---- drivers/gpu/drm/i915/gvt/scheduler.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 4427be18e4a93..940cdaaa3f245 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -496,6 +496,12 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + ret = prepare_shadow_batch_buffer(workload); if (ret) { gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f031234290f6b..3ac1dc97a7a06 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -254,7 +254,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_ring *ring; int ret; @@ -300,6 +299,26 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = populate_shadow_context(workload); if (ret) goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; + int ret; rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -314,14 +333,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; - workload->shadowed = true; return 0; err_unpin: engine->context_unpin(engine, shadow_ctx); -err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2d694f6c09076..b9f872204d7e7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -142,4 +142,7 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload); + #endif From 7e60590208942de856fd43956ccebd19e71d686f Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 16 Nov 2017 16:54:23 +0800 Subject: [PATCH 221/406] drm/i915/gvt: enabled pipe A default on creating vgpu when i915 driver unloading, it will shutdown all CRTCs and it will introudce kernel panic when conducting igt drv_module_reload test case under guest environment (bug reported by XENGT-468) as below: BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 IP: intel_edp_backlight_off+0xe/0x7c [i915] RIP: 0010:intel_edp_backlight_off+0xe/0x7c [i915] Call Trace: intel_disable_ddi+0xb3/0xbc [i915] intel_modeset_setup_hw_state+0x654/0xb4c [i915] intel_modeset_init+0x9f1/0xe69 [i915] ? intel_i2c_reset+0x3d/0x40 [i915] ? intel_setup_gmbus+0xba/0x249 [i915] i915_driver_load+0xae5/0xcc0 [i915] i915_pci_probe+0x3a/0x3c [i915] local_pci_probe+0x38/0x7b pci_device_probe+0xec/0x12b driver_probe_device+0x134/0x294 __driver_attach+0x6a/0x8c ? driver_probe_device+0x294/0x294 bus_for_each_dev+0x68/0x80 driver_attach+0x19/0x1b bus_add_driver+0xea/0x1d3 ? 0xffffffffa03cd000 driver_register+0x85/0xc1 ? 0xffffffffa03cd000 __pci_register_driver+0x55/0x57 i915_init+0x57/0x5a [i915] do_one_initcall+0x8a/0x12e ? __vunmap+0x8d/0x93 ? kmem_cache_alloc_trace+0x96/0x11c do_init_module+0x5a/0x1e1 in this case, active connector detected but no active pipe available, so it will hang to disable connector. to fix, on vgpu creating, to report active pipe available for guest. Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a6596..355120865efd1 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -282,6 +282,7 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -307,6 +308,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; return 0; } From b721b65af4eb46df6a1d9e34b14003225e403565 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 28 Nov 2017 07:29:54 +0800 Subject: [PATCH 222/406] drm/i915/gvt: Correct ADDR_4K/2M/1G_MASK definition For ADDR_4K_MASK, bit[45..12] should be 1, all other bits should be 0. The current definition wrongly set bit[46] as 1 also. This path fixes this. v2: Add commit message, fixes and cc stable.(Zhenyu) Fixes: 2707e4446688("drm/i915/gvt: vGPU graphics memory virtualization") Signed-off-by: Xiong Zhang Cc: stable@vger.kernel.org Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2801d70579d8c..8e331142badbc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -311,9 +311,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) +#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) +#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) +#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { From 69fc6cbbac542c349b3d350d10f6e394c253c81d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:24 +0800 Subject: [PATCH 223/406] btrfs: tree-checker: Fix false panic for sanity test [BUG] If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will instantly cause kernel panic like: ------ ... assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853 ... Call Trace: btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs] setup_items_for_insert+0x385/0x650 [btrfs] __btrfs_drop_extents+0x129a/0x1870 [btrfs] ... ----- [Cause] Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y. However quite some btrfs_mark_buffer_dirty() callers(*) don't really initialize its item data but only initialize its item pointers, leaving item data uninitialized. This makes tree-checker catch uninitialized data as error, causing such panic. *: These callers include but not limited to setup_items_for_insert() btrfs_split_item() btrfs_expand_item() [Fix] Add a new parameter @check_item_data to btrfs_check_leaf(). With @check_item_data set to false, item data check will be skipped and fallback to old btrfs_check_leaf() behavior. So we can still get early warning if we screw up item pointers, and avoid false panic. Cc: Filipe Manana Reported-by: Lakshmipathi.G Signed-off-by: Qu Wenruo Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++++++-- fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++----- fs/btrfs/tree-checker.h | 14 +++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index efce9a2fa9be0..10a2a579cc7f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } @@ -3848,7 +3848,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 114fc5f0ecc5e..ce4ed6ec8f392 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -242,7 +242,8 @@ static int check_leaf_item(struct btrfs_root *root, return ret; } -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, + bool check_item_data) { struct btrfs_fs_info *fs_info = root->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -361,10 +362,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return -EUCLEAN; } - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; + if (check_item_data) { + /* + * Check if the item size and content meet other + * criteria + */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + } prev_key.objectid = key.objectid; prev_key.type = key.type; @@ -374,6 +380,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return 0; } +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, false); +} + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) { unsigned long nr = btrfs_header_nritems(node); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96c486e95d704..3d53e8d6fda0c 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,7 +20,19 @@ #include "ctree.h" #include "extent_io.h" -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); #endif From 57c77ce47063bd298e31ffcf7875ed0c2633bf8d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:11 +0100 Subject: [PATCH 224/406] mlxsw: spectrum_router: Offload decap only for up tunnels When a new local route is added, an IPIP entry is looked up to determine whether the route should be offloaded as a tunnel decap or as a trap. That decision should take into account whether the tunnel netdevice in question is actually IFF_UP, and only install a decap offload if it is. Fixes: 0063587d3587 ("mlxsw: spectrum: Support decap-only IP-in-IP tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 632c7b229054c..c529e31fc78b7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3996,7 +3996,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, case RTN_LOCAL: ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4, dip); - if (ipip_entry) { + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, From cab43d9c877456d2d8feb77335327316d3de9871 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:12 +0100 Subject: [PATCH 225/406] mlxsw: spectrum_router: Demote tunnels on VRF migration The mlxsw driver currently doesn't offload GRE tunnels if they have the same local address and use the same underlay VRF. When such a situation arises, the tunnels in conflict are demoted to slow path. However, the current code only verifies this condition on tunnel creation and tunnel change, not when a tunnel is moved to a different VRF. When the tunnel has no bound device, underlay and overlay are the same. Thus moving a tunnel moves the underlay as well, and that can cause local address conflict. So modify mlxsw_sp_netdevice_ipip_ol_vrf_event() to check if there are any conflicting tunnels, and demote them if yes. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c529e31fc78b7..a23e452f7261b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1449,9 +1449,27 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; if (!ipip_entry) return 0; + + /* For flat configuration cases, moving overlay to a different VRF might + * cause local address conflict, and the conflicting tunnels need to be + * demoted. + */ + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, false, false, extack); } From d97cda5f465bacc82659263a885703d73759ea04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:13 +0100 Subject: [PATCH 226/406] mlxsw: spectrum_router: Handle encap to demoted tunnels Some tunnels that are offloadable on their own can nonetheless be demoted to slow path if their local address is in conflict with that of another tunnel. When a route is formed for such a tunnel, mlxsw_sp_nexthop_ipip_init() fails to find the corresponding IPIP entry, and that triggers a FIB abort. Resolve the problem by not assuming that a tunnel for which mlxsw_sp_ipip_ops.can_offload() holds also automatically has an IPIP entry. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a23e452f7261b..17e2153a8dc9c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3361,22 +3361,19 @@ static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) return ul_dev ? (ul_dev->flags & IFF_UP) : true; } -static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct net_device *ol_dev) +static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_ipip_entry *ipip_entry) { bool removing; if (!nh->nh_grp->gateway || nh->ipip_entry) - return 0; - - nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (!nh->ipip_entry) - return -ENOENT; + return; - removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev); + nh->ipip_entry = ipip_entry; + removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - return 0; + mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -3421,21 +3418,21 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct fib_nh *fib_nh) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; struct net_device *dev = fib_nh->nh_dev; - enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -4712,21 +4709,21 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct rt6_info *rt) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; struct net_device *dev = rt->dst.dev; - enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; From 09dbf6297fa8bb4433d9293623b464750d874685 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:14 +0100 Subject: [PATCH 227/406] mlxsw: spectrum_router: Update nexthop RIF on update The function mlxsw_sp_nexthop_rif_update() walks the list of nexthops associated with a RIF, and updates the corresponding entries in the switch. It is used in particular when a tunnel underlay netdevice moves to a different VRF, and all the nexthops are migrated over to a new RIF. The problem is that each nexthop holds a reference to its RIF, and that is not updated. So after the old RIF is gone, further activity on these nexthops (such as downing the underlay netdevice) dereferences a dangling pointer. Fix the issue by updating rif of impacted nexthops before calling mlxsw_sp_nexthop_rif_update(). Fixes: 0c5f1cd5ba8c ("mlxsw: spectrum_router: Generalize __mlxsw_sp_ipip_entry_update_tunnel()") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 17e2153a8dc9c..72ef4f8025f00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1370,8 +1370,9 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif); static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1389,17 +1390,18 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) { - list_splice_init(&old_lb_rif->common.nexthop_list, - &new_lb_rif->common.nexthop_list); - mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common); - } + if (keep_encap) + mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common); mlxsw_sp_rif_destroy(&old_lb_rif->common); return 0; } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + /** * Update the offload related to an IPIP entry. This always updates decap, and * in addition to that it also: @@ -3560,6 +3562,18 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif) +{ + struct mlxsw_sp_nexthop *nh; + + list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); + list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) + nh->rif = new_rif; + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { From 1a6152d36dee08da2be2a3030dceb45ef680460a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Nov 2017 23:01:44 +0800 Subject: [PATCH 228/406] quota: propagate error from __dquot_initialize In commit 6184fc0b8dd7 ("quota: Propagate error from ->acquire_dquot()"), we have propagated error from __dquot_initialize to caller, but we forgot to handle such error in add_dquot_ref(), so, currently, during quota accounting information initialization flow, if we failed for some of inodes, we just ignore such error, and do account for others, which is not a good implementation. In this patch, we choose to let user be aware of such error, so after turning on quota successfully, we can make sure all inodes disk usage can be accounted, which will be more reasonable. Suggested-by: Jan Kara Signed-off-by: Chao Yu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39f1b0b0c76fb..d38684ff2ebb8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type) } /* This routine is guarded by s_umount semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif + int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type) reserved = 1; #endif iput(old_inode); - __dquot_initialize(inode, type); + err = __dquot_initialize(inode, type); + if (err) { + iput(inode); + goto out; + } /* * We hold a reference to 'inode' so it couldn't have been @@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type) } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); - +out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " @@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type) "Please run quotacheck(8)"); } #endif + return err; } /* @@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); - add_dquot_ref(sb, type); - - return 0; + error = add_dquot_ref(sb, type); + if (error) + dquot_disable(sb, type, flags); + return error; out_file_init: dqopt->files[type] = NULL; iput(inode); From ba2d8d887d962c2f790e6dc01b2fd25b4608720b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:48 +0100 Subject: [PATCH 229/406] net: mvpp2: fix the txq_init error path When an allocation in the txq_init path fails, the allocated buffers end-up being freed twice: in the txq_init error path, and in txq_deinit. This lead to issues as txq_deinit would work on already freed memory regions: kernel BUG at mm/slub.c:3915! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP This patch fixes this by removing the txq_init own error path, as the txq_deinit function is always called on errors. This was introduced by TSO as way more buffers are allocated. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 6c20e811f9732..79f01cd80dd7f 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5805,7 +5805,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, sizeof(*txq_pcpu->buffs), GFP_KERNEL); if (!txq_pcpu->buffs) - goto cleanup; + return -ENOMEM; txq_pcpu->count = 0; txq_pcpu->reserved_num = 0; @@ -5821,26 +5821,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, &txq_pcpu->tso_headers_dma, GFP_KERNEL); if (!txq_pcpu->tso_headers) - goto cleanup; + return -ENOMEM; } return 0; -cleanup: - for_each_present_cpu(cpu) { - txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->buffs); - - dma_free_coherent(port->dev->dev.parent, - txq_pcpu->size * TSO_HEADER_SIZE, - txq_pcpu->tso_headers, - txq_pcpu->tso_headers_dma); - } - - dma_free_coherent(port->dev->dev.parent, - txq->size * MVPP2_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_dma); - - return -ENOMEM; } /* Free allocated TXQ resources */ From 26146b0e6b6869c6cd8a45ab3a4a5562e7a91b23 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:49 +0100 Subject: [PATCH 230/406] net: mvpp2: cleanup probed ports in the probe error path This patches fixes the probe error path by cleaning up probed ports, to avoid leaving registered net devices when the driver failed to probe. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 79f01cd80dd7f..afae4fe00965b 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -8329,7 +8329,7 @@ static int mvpp2_probe(struct platform_device *pdev) for_each_available_child_of_node(dn, port_node) { err = mvpp2_port_probe(pdev, port_node, priv, i); if (err < 0) - goto err_mg_clk; + goto err_port_probe; i++; } @@ -8345,12 +8345,19 @@ static int mvpp2_probe(struct platform_device *pdev) priv->stats_queue = create_singlethread_workqueue(priv->queue_name); if (!priv->stats_queue) { err = -ENOMEM; - goto err_mg_clk; + goto err_port_probe; } platform_set_drvdata(pdev, priv); return 0; +err_port_probe: + i = 0; + for_each_available_child_of_node(dn, port_node) { + if (priv->port_list[i]) + mvpp2_port_remove(priv->port_list[i]); + i++; + } err_mg_clk: clk_disable_unprepare(priv->axi_clk); if (priv->hw_version == MVPP22) From e749aca84b10f3987b2ee1f76e0c7d8aacc5653c Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Tue, 28 Nov 2017 14:19:50 +0100 Subject: [PATCH 231/406] net: mvpp2: do not disable GMAC padding Short fragmented packets may never be sent by the hardware when padding is disabled. This patch stop modifying the GMAC padding bits, to leave them to their reset value (disabled). Fixes: 3919357fb0bb ("net: mvpp2: initialize the GMAC when using a port") Signed-off-by: Yan Markman [Antoine: commit message] Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index afae4fe00965b..5be58b04b95e8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -4629,11 +4629,6 @@ static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; writel(val, port->base + MVPP22_GMAC_CTRL_4_REG); - - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - val |= MVPP2_GMAC_DISABLE_PADDING; - val &= ~MVPP2_GMAC_FLOW_CTRL_MASK; - writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); } else if (phy_interface_mode_is_rgmii(port->phy_interface)) { val = readl(port->base + MVPP22_GMAC_CTRL_4_REG); val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | @@ -4641,10 +4636,6 @@ static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; val &= ~MVPP22_CTRL4_DP_CLK_SEL; writel(val, port->base + MVPP22_GMAC_CTRL_4_REG); - - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - val &= ~MVPP2_GMAC_DISABLE_PADDING; - writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); } /* The port is connected to a copper PHY */ From 76e583c5f50ef539caea6935d37af3595034befb Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:51 +0100 Subject: [PATCH 232/406] net: mvpp2: check ethtool sets the Tx ring size is to a valid min value This patch fixes the Tx ring size checks when using ethtool, by adding an extra check in the PPv2 check_ringparam_valid helper. The Tx ring size cannot be set to a value smaller than the minimum number of descriptors needed for TSO. Fixes: 1d17db08c056 ("net: mvpp2: limit TSO segments and use stop/wake thresholds") Suggested-by: Yan Markman Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 5be58b04b95e8..d83a78be98a2c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6842,6 +6842,12 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev, else if (!IS_ALIGNED(ring->tx_pending, 32)) new_tx_pending = ALIGN(ring->tx_pending, 32); + /* The Tx ring size cannot be smaller than the minimum number of + * descriptors needed for TSO. + */ + if (new_tx_pending < MVPP2_MAX_SKB_DESCS) + new_tx_pending = ALIGN(MVPP2_MAX_SKB_DESCS, 32); + if (ring->rx_pending != new_rx_pending) { netdev_info(dev, "illegal Rx ring size value %d, round to %d\n", ring->rx_pending, new_rx_pending); From 952b6b3b07877419386e719ff20917170e1ce684 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:26:30 +0100 Subject: [PATCH 233/406] net: phy: marvell10g: fix the PHY id mask The Marvell 10G PHY driver supports different hardware revisions, which have their bits 3..0 differing. To get the correct revision number these bits should be ignored. This patch fixes this by using the already defined MARVELL_PHY_ID_MASK (0xfffffff0) instead of the custom 0xffffffff mask. Fixes: 20b2af32ff3f ("net: phy: add Marvell Alaska X 88X3310 10Gigabit PHY support") Suggested-by: Yan Markman Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index aebc08beceba3..21b3f36e023a9 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -16,6 +16,7 @@ * link takes priority and the other port is completely locked out. */ #include +#include enum { MV_PCS_BASE_T = 0x0000, @@ -338,7 +339,7 @@ static int mv3310_read_status(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = 0x002b09aa, - .phy_id_mask = 0xffffffff, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88x3310", .features = SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Full | @@ -360,7 +361,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { 0x002b09aa, 0xffffffff }, + { 0x002b09aa, MARVELL_PHY_ID_MASK }, { }, }; MODULE_DEVICE_TABLE(mdio, mv3310_tbl); From dea521a2b9f96e905fa2bb2f95e23ec00c2ec436 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Nov 2017 20:46:49 +0100 Subject: [PATCH 234/406] bnxt_en: Fix an error handling path in 'bnxt_get_module_eeprom()' Error code returned by 'bnxt_read_sfp_module_eeprom_info()' is handled a few lines above when reading the A0 portion of the EEPROM. The same should be done when reading the A2 portion of the EEPROM. In order to correctly propagate an error, update 'rc' in this 2nd call as well, otherwise 0 (success) is returned. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7ce1d4b7e67de..b13ce5ebde8d9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2136,8 +2136,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } From f95d5bf03b5ec0d7ea8e552e15abe70c646249b5 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Wed, 22 Nov 2017 16:29:57 +0100 Subject: [PATCH 235/406] lmc: Use memdup_user() as a cleanup Fix coccicheck warning which recommends to use memdup_user(): drivers/net/wan/lmc/lmc_main.c:497:27-34: WARNING opportunity for memdup_user Generated by: scripts/coccinelle/memdup_user/memdup_user.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 37b1e0d03e31d..90a4ad9a2d081 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -494,18 +494,11 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; } - data = kmalloc(xc.len, GFP_KERNEL); - if (!data) { - ret = -ENOMEM; + data = memdup_user(xc.data, xc.len); + if (IS_ERR(data)) { + ret = PTR_ERR(data); break; } - - if(copy_from_user(data, xc.data, xc.len)) - { - kfree(data); - ret = -ENOMEM; - break; - } printk("%s: Starting load of data Len: %d at 0x%p == 0x%p\n", dev->name, xc.len, xc.data, data); From 08f46070dde2a835a7a5bfd4c70372c27bff5d88 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:06 +0800 Subject: [PATCH 236/406] sctp: force SCTP_ERROR_INV_STRM with __u32 when calling sctp_chunk_fail This patch is to force SCTP_ERROR_INV_STRM with right type to fit in sctp_chunk_fail to avoid the sparse error. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a20145b3a9494..76ea66be0bbee 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, */ /* Mark as failed send. */ - sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM); if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; From af2697a0273f7665429c47d71ab26f2412af924e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:07 +0800 Subject: [PATCH 237/406] sctp: force the params with right types for sctp csum apis Now sctp_csum_xxx doesn't really match the param types of these common csum apis. As sctp_csum_xxx is defined in sctp/checksum.h, many sparse errors occur when make C=2 not only with M=net/sctp but also with other modules that include this header file. This patch is to force them fit in csum apis with the right types. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/checksum.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 4a5b9a306c69b..32ee65a30aff1 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -48,31 +48,32 @@ static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) /* This uses the crypto implementation of crc32c, which is either * implemented w/ hardware support or resolves to __crc32c_le(). */ - return crc32c(sum, buff, len); + return (__force __wsum)crc32c((__force __u32)sum, buff, len); } static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { - return __crc32c_le_combine(csum, csum2, len); + return (__force __wsum)__crc32c_le_combine((__force __u32)csum, + (__force __u32)csum2, len); } static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = sctp_hdr(skb); - __le32 ret, old = sh->checksum; const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, }; + __le32 old = sh->checksum; + __wsum new; sh->checksum = 0; - ret = cpu_to_le32(~__skb_checksum(skb, offset, skb->len - offset, - ~(__u32)0, &ops)); + new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops); sh->checksum = old; - return ret; + return cpu_to_le32((__force __u32)new); } #endif /* __sctp_checksum_h__ */ From 1ba896f6f52bfafac6dec4ca583cdd9a073858e8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:08 +0800 Subject: [PATCH 238/406] sctp: remove extern from stream sched Now each stream sched ops is defined in different .c file and added into the global ops in another .c file, it uses extern to make this work. However extern is not good coding style to get them in and even make C=2 reports errors for this. This patch adds sctp_sched_ops_xxx_init for each stream sched ops in their .c file, then get them into the global ops by calling them when initializing sctp module. Fixes: 637784ade221 ("sctp: introduce priority based stream scheduler") Fixes: ac1ed8b82cd6 ("sctp: introduce round robin stream scheduler") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 5 +++++ include/net/sctp/stream_sched.h | 5 +++++ net/sctp/protocol.c | 1 + net/sctp/stream_sched.c | 25 ++++++++++++++++++------- net/sctp/stream_sched_prio.c | 7 ++++++- net/sctp/stream_sched_rr.c | 7 ++++++- 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 749a428824371..906a9c0efa714 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -194,6 +194,11 @@ void sctp_remaddr_proc_exit(struct net *net); */ int sctp_offload_init(void); +/* + * sctp/stream_sched.c + */ +void sctp_sched_ops_init(void); + /* * sctp/stream.c */ diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index c676550a4c7dd..5c5da48f65e7c 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -69,4 +69,9 @@ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops); +void sctp_sched_ops_prio_init(void); +void sctp_sched_ops_rr_init(void); + #endif /* __sctp_stream_sched_h__ */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f5172c21349be..6a38c25036498 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1499,6 +1499,7 @@ static __init int sctp_init(void) INIT_LIST_HEAD(&sctp_address_families); sctp_v4_pf_init(); sctp_v6_pf_init(); + sctp_sched_ops_init(); status = register_pernet_subsys(&sctp_defaults_ops); if (status) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 0b83ec51e43b0..d8c162a4089ca 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .unsched_all = sctp_sched_fcfs_unsched_all, }; +static void sctp_sched_ops_fcfs_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); +} + /* API to other parts of the stack */ -extern struct sctp_sched_ops sctp_sched_prio; -extern struct sctp_sched_ops sctp_sched_rr; +static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; -static struct sctp_sched_ops *sctp_sched_ops[] = { - &sctp_sched_fcfs, - &sctp_sched_prio, - &sctp_sched_rr, -}; +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops) +{ + sctp_sched_ops[sched] = sched_ops; +} + +void sctp_sched_ops_init(void) +{ + sctp_sched_ops_fcfs_init(); + sctp_sched_ops_prio_init(); + sctp_sched_ops_rr_init(); +} int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 384dbf3c87609..7997d35dd0fdf 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) sctp_sched_prio_unsched(soute); } -struct sctp_sched_ops sctp_sched_prio = { +static struct sctp_sched_ops sctp_sched_prio = { .set = sctp_sched_prio_set, .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, @@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = { .sched_all = sctp_sched_prio_sched_all, .unsched_all = sctp_sched_prio_unsched_all, }; + +void sctp_sched_ops_prio_init(void) +{ + sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio); +} diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 7612a438c5b93..1155692448f1a 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) sctp_sched_rr_unsched(stream, soute); } -struct sctp_sched_ops sctp_sched_rr = { +static struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, @@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = { .sched_all = sctp_sched_rr_sched_all, .unsched_all = sctp_sched_rr_unsched_all, }; + +void sctp_sched_ops_rr_init(void) +{ + sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); +} From 57f015f5eccf25fd4a3336fe3cbbee920a8fba6f Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 28 Nov 2017 10:44:29 -0500 Subject: [PATCH 239/406] packet: fix crash in fanout_demux_rollover() syzkaller found a race condition fanout_demux_rollover() while removing a packet socket from a fanout group. po->rollover is read and operated on during packet_rcv_fanout(), via fanout_demux_rollover(), but the pointer is currently cleared before the synchronization in packet_release(). It is safer to delay the cleanup until after synchronize_net() has been called, ensuring all calls to packet_rcv_fanout() for this socket have finished. To further simplify synchronization around the rollover structure, set po->rollover in fanout_add() only if there are no errors. This removes the need for rcu in the struct and in the call to packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...). Crashing stack trace: fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392 packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487 dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953 xmit_one net/core/dev.c:2975 [inline] dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995 __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476 dev_queue_xmit+0x17/0x20 net/core/dev.c:3509 neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379 neigh_output include/net/neighbour.h:482 [inline] ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120 ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146 NF_HOOK_COND include/linux/netfilter.h:239 [inline] ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163 dst_output include/net/dst.h:459 [inline] NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250 mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660 mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072 mld_send_initial_cr net/ipv6/mcast.c:2056 [inline] ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079 addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039 addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971 process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432 Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()") Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 32 ++++++++++---------------------- net/packet/internal.h | 1 - 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 737092ca9b4ee..1b7bb9d9865e2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); - po->rollover = rollover; } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { @@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; + po->rollover = rollover; + rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; @@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: - if (err && rollover) { - kfree_rcu(rollover, rcu); - po->rollover = NULL; - } + kfree(rollover); mutex_unlock(&fanout_mutex); return err; } @@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) list_del(&f->list); else f = NULL; - - if (po->rollover) { - kfree_rcu(po->rollover, rcu); - po->rollover = NULL; - } } mutex_unlock(&fanout_mutex); @@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock) synchronize_net(); if (f) { + kfree(po->rollover); fanout_release_data(f); kfree(f); } @@ -3843,7 +3837,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; - struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3922,18 +3915,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - rcu_read_lock(); - rollover = rcu_dereference(po->rollover); - if (rollover) { - rstats.tp_all = atomic_long_read(&rollover->num); - rstats.tp_huge = atomic_long_read(&rollover->num_huge); - rstats.tp_failed = atomic_long_read(&rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); - } - rcu_read_unlock(); - if (!rollover) + if (!po->rollover) return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; diff --git a/net/packet/internal.h b/net/packet/internal.h index 562fbc1550063..a1d2b2319ae99 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -95,7 +95,6 @@ struct packet_fanout { struct packet_rollover { int sock; - struct rcu_head rcu; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; From 15fe076edea787807a7cdc168df832544b58eba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Nov 2017 08:03:30 -0800 Subject: [PATCH 240/406] net/packet: fix a race in packet_bind() and packet_notifier() syzbot reported crashes [1] and provided a C repro easing bug hunting. When/if packet_do_bind() calls __unregister_prot_hook() and releases po->bind_lock, another thread can run packet_notifier() and process an NETDEV_UP event. This calls register_prot_hook() and hooks again the socket right before first thread is able to grab again po->bind_lock. Fixes this issue by temporarily setting po->num to 0, as suggested by David Miller. [1] dev_remove_pack: ffff8801bf16fa80 not found ------------[ cut here ]------------ kernel BUG at net/core/dev.c:7945! ( BUG_ON(!list_empty(&dev->ptype_all)); ) invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: device syz0 entered promiscuous mode CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801cc57a500 task.stack: ffff8801cc588000 RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945 RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293 RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2 RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810 device syz0 entered promiscuous mode RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8 R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0 FS: 0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106 tun_detach drivers/net/tun.c:670 [inline] tun_chr_close+0x49/0x60 drivers/net/tun.c:2845 __fput+0x333/0x7f0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x199/0x270 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x9bb/0x1ae0 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:968 SYSC_exit_group kernel/exit.c:979 [inline] SyS_exit_group+0x1d/0x20 kernel/exit.c:977 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x44ad19 Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Francesco Ruggeri Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1b7bb9d9865e2..da215e5c13992 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3091,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { if (po->running) { rcu_read_unlock(); + /* prevents packet_notifier() from calling + * register_prot_hook() + */ + po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3099,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } + BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; From ea37d5998b50a72b9045ba60a132eeb20e1c4230 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Nov 2017 01:54:00 +0000 Subject: [PATCH 241/406] Btrfs: incremental send, fix wrong unlink path after renaming file Under some circumstances, an incremental send operation can issue wrong paths for unlink commands related to files that have multiple hard links and some (or all) of those links were renamed between the parent and send snapshots. Consider the following example: Parent snapshot . (ino 256) |---- a/ (ino 257) | |---- b/ (ino 259) | | |---- c/ (ino 260) | | |---- f2 (ino 261) | | | |---- f2l1 (ino 261) | |---- d/ (ino 262) |---- f1l1_2 (ino 258) |---- f2l2 (ino 261) |---- f1_2 (ino 258) Send snapshot . (ino 256) |---- a/ (ino 257) | |---- f2l1/ (ino 263) | |---- b2/ (ino 259) | |---- c/ (ino 260) | | |---- d3 (ino 262) | | |---- f1l1_2 (ino 258) | | |---- f2l2_2 (ino 261) | | |---- f1_2 (ino 258) | | | |---- f2 (ino 261) | |---- f1l2 (ino 258) | |---- d (ino 261) When computing the incremental send stream the following steps happen: 1) When processing inode 261, a rename operation is issued that renames inode 262, which currently as a path of "d", to an orphan name of "o262-7-0". This is done because in the send snapshot, inode 261 has of its hard links with a path of "d" as well. 2) Two link operations are issued that create the new hard links for inode 261, whose names are "d" and "f2l2_2", at paths "/" and "o262-7-0/" respectively. 3) Still while processing inode 261, unlink operations are issued to remove the old hard links of inode 261, with names "f2l1" and "f2l2", at paths "a/" and "d/". However path "d/" does not correspond anymore to the directory inode 262 but corresponds instead to a hard link of inode 261 (link command issued in the previous step). This makes the receiver fail with a ENOTDIR error when attempting the unlink operation. The problem happens because before sending the unlink operation, we failed to detect that inode 262 was one of ancestors for inode 261 in the parent snapshot, and therefore we didn't recompute the path for inode 262 before issuing the unlink operation for the link named "f2l2" of inode 262. The detection failed because the function "is_ancestor()" only follows the first hard link it finds for an inode instead of all of its hard links (as it was originally created for being used with directories only, for which only one hard link exists). So fix this by making "is_ancestor()" follow all hard links of the input inode. A test case for fstests follows soon. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 124 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 106 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c10e4c70f02d1..20d3300bd2689 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3521,7 +3521,40 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, } /* - * Check if ino ino1 is an ancestor of inode ino2 in the given root. + * Check if inode ino2, or any of its ancestors, is inode ino1. + * Return 1 if true, 0 if false and < 0 on error. + */ +static int check_ino_in_path(struct btrfs_root *root, + const u64 ino1, + const u64 ino1_gen, + const u64 ino2, + const u64 ino2_gen, + struct fs_path *fs_path) +{ + u64 ino = ino2; + + if (ino1 == ino2) + return ino1_gen == ino2_gen; + + while (ino > BTRFS_FIRST_FREE_OBJECTID) { + u64 parent; + u64 parent_gen; + int ret; + + fs_path_reset(fs_path); + ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); + if (ret < 0) + return ret; + if (parent == ino1) + return parent_gen == ino1_gen; + ino = parent; + } + return 0; +} + +/* + * Check if ino ino1 is an ancestor of inode ino2 in the given root for any + * possible path (in case ino2 is not a directory and has multiple hard links). * Return 1 if true, 0 if false and < 0 on error. */ static int is_ancestor(struct btrfs_root *root, @@ -3530,36 +3563,91 @@ static int is_ancestor(struct btrfs_root *root, const u64 ino2, struct fs_path *fs_path) { - u64 ino = ino2; - bool free_path = false; + bool free_fs_path = false; int ret = 0; + struct btrfs_path *path = NULL; + struct btrfs_key key; if (!fs_path) { fs_path = fs_path_alloc(); if (!fs_path) return -ENOMEM; - free_path = true; + free_fs_path = true; } - while (ino > BTRFS_FIRST_FREE_OBJECTID) { - u64 parent; - u64 parent_gen; + path = alloc_path_for_send(); + if (!path) { + ret = -ENOMEM; + goto out; + } - fs_path_reset(fs_path); - ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); - if (ret < 0) { - if (ret == -ENOENT && ino == ino2) - ret = 0; - goto out; + key.objectid = ino2; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + u32 cur_offset = 0; + u32 item_size; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + continue; } - if (parent == ino1) { - ret = parent_gen == ino1_gen ? 1 : 0; - goto out; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino2) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + item_size = btrfs_item_size_nr(leaf, slot); + while (cur_offset < item_size) { + u64 parent; + u64 parent_gen; + + if (key.type == BTRFS_INODE_EXTREF_KEY) { + unsigned long ptr; + struct btrfs_inode_extref *extref; + + ptr = btrfs_item_ptr_offset(leaf, slot); + extref = (struct btrfs_inode_extref *) + (ptr + cur_offset); + parent = btrfs_inode_extref_parent(leaf, + extref); + cur_offset += sizeof(*extref); + cur_offset += btrfs_inode_extref_name_len(leaf, + extref); + } else { + parent = key.offset; + cur_offset = item_size; + } + + ret = get_inode_info(root, parent, NULL, &parent_gen, + NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = check_ino_in_path(root, ino1, ino1_gen, + parent, parent_gen, fs_path); + if (ret) + goto out; } - ino = parent; + path->slots[0]++; } + ret = 0; out: - if (free_path) + btrfs_free_path(path); + if (free_fs_path) fs_path_free(fs_path); return ret; } From 9d0ca444d0b317d31acf6f8fc18ac6f478518924 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Nov 2017 17:20:53 +0100 Subject: [PATCH 242/406] s390/gs: add compat regset for the guarded storage broadcast control block git commit e525f8a6e696210d15f8b8277d4da12fc4add299 "s390/gs: add regset for the guarded storage broadcast control block" added the missing regset to the s390_regsets array but failed to add it to the s390_compat_regsets array. Fixes: e525f8a6e696 ("add compat regset for the guarded storage broadcast control block") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 26c0523c14882..cd3df5514552c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1650,6 +1650,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, { .core_note_type = NT_S390_RI_CB, .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), From eb1bd249ba016284ed762d87c1989dd822500773 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 28 Nov 2017 18:28:44 +0200 Subject: [PATCH 243/406] nvme-rdma: fix memory leak during queue allocation In case nvme_rdma_wait_for_cm timeout expires before we get an established or rejected event (rdma_connect succeeded) from rdma_cm, we end up with leaking the ib transport resources for dedicated queue. This scenario can easily reproduced using traffic test during port toggling. Also, in order to protect from parallel ib queue destruction, that may be invoked from different context's, introduce new flag that stands for transport readiness. While we're here, protect also against a situation that we can receive rdma_cm events during ib queue destruction. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 02ef0771f6b90..37af56596be6c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,6 +77,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_ALLOCATED = 0, NVME_RDMA_Q_LIVE = 1, + NVME_RDMA_Q_TR_READY = 2, }; struct nvme_rdma_queue { @@ -390,12 +391,23 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + + if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); - rdma_destroy_qp(queue->cm_id); + /* + * The cm_id object might have been destroyed during RDMA connection + * establishment error flow to avoid getting other cma events, thus + * the destruction of the QP shouldn't use rdma_cm API. + */ + ib_destroy_qp(queue->qp); ib_free_cq(queue->ib_cq); nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, @@ -463,6 +475,8 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); + return 0; out_destroy_ring: @@ -529,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); return ret; } From 7e5dd57ef3081ff6c03908d786ed5087f6fbb7ae Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 25 Nov 2017 03:03:00 +0900 Subject: [PATCH 244/406] nvme-pci: fix NULL pointer dereference in nvme_free_host_mem() Following condition which will cause NULL pointer dereference will occur in nvme_free_host_mem() when it tries to remove pci device via nvme_remove() especially after a failure of host memory allocation for HMB. "(host_mem_descs == NULL) && (nr_host_mem_descs != 0)" It's because __nr_host_mem_descs__ is not cleared to 0 unlike __host_mem_descs__ is so. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 617374762b7c5..f5800c3c9082a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->nr_host_mem_descs = 0; } static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, From d210a9874b8f6166579408131cb74495caff1958 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: [PATCH 245/406] xfs: fortify xfs_alloc_buftarg error handling percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4db6e8d780f69..4c6e86d861fda 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1815,22 +1815,27 @@ xfs_alloc_buftarg( btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } From d41c6172bd4031979eab722c265a2e5764383c3c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: [PATCH 246/406] xfs: fix leaks on corruption errors in xfs_bmap.c Use _GOTO instead of _RETURN so we can free the allocated cursor on error. Fixes: bf80628 ("xfs: remove xfs_bmse_shift_one") Fixes-coverity-id: 1423813, 1423676 Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e23152..1210f684d3c28 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); if (stop_fsb >= got.br_startoff + got.br_blockcount) { error = -EIO; From eda6bc27ccc852d34393739009486932f3ba70ae Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: [PATCH 247/406] xfs: fix uninitialized variable in xfs_scrub_quota On the first pass through the while(1) loop, we get to xfs_scrub_should_terminate() which can test the uninitialized error variable. Fixes-coverity-id: 1423737 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 8e58ba8429464..613def9692a12 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -207,7 +207,7 @@ xfs_scrub_quota( xfs_dqid_t id = 0; uint dqtype; int nimaps; - int error; + int error = 0; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return -ENOENT; From 712d361d59efa6349a9538f4fd9a49073f0e8127 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:33 -0800 Subject: [PATCH 248/406] xfs: calculate correct offset in xfs_scrub_quota_item It's only used for tracepoints so it's relatively harmless, but the offset is calculated incorrectly in xfs_scrub_quota_item. qi_dqperchunk is the nr. of dquots per "chunk" which we have conveniently *cough* defined to always be 1 FSB. Therefore block_offset * qi_dqperchunk == first id in that chunk, and so offset = id / qi_dqperchunk id * dqperchunk is ... meaningless. Fixes-coverity-id: 1423965 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 613def9692a12..3d9037eceaf1b 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -107,7 +107,7 @@ xfs_scrub_quota_item( unsigned long long rcount; xfs_ino_t fs_icount; - offset = id * qi->qi_dqperchunk; + offset = id / qi->qi_dqperchunk; /* * We fed $id and DQNEXT into the xfs_qm_dqget call, which means From 3c02a6d946657e1ae0688e0d89f2dd2cfe9afba8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Nov 2017 10:59:40 +0100 Subject: [PATCH 249/406] Revert "ALSA: usb-audio: Fix potential zero-division at parsing FU" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 8428a8ebde2d ("ALSA: usb-audio: Fix potential zero-division at parsing FU") is utterly bogus and breaks the case with csize=1 instead of fixing anything. Just take it back again. Reported-by: Jörg Otte Fixes: 8428a8ebde2d ("ALSA: usb-audio: Fix potential zero-division at parsing FU" Signed-off-by: Takashi Iwai Signed-off-by: Linus Torvalds --- sound/usb/mixer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 0537c63229908..61b348383de88 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1476,9 +1476,9 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, return -EINVAL; } csize = hdr->bControlSize; - if (csize <= 1) { + if (!csize) { usb_audio_dbg(state->chip, - "unit %u: invalid bControlSize <= 1\n", + "unit %u: invalid bControlSize == 0\n", unitid); return -EINVAL; } From f81a348728ec5ac43f3bbcf81c97d52baba253f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Nov 2017 11:59:13 +0000 Subject: [PATCH 250/406] arm64: mm: cleanup stale AIVIVT references Since commit: 155433cb365ee466 ("arm64: cache: Remove support for ASID-tagged VIVT I-caches") ... the kernel no longer cares about AIVIVT I-caches, as these were removed from the architecture. This patch removes the stale references to such I-caches. The comment in flush_context() is also updated to clarify when and where the TLB invalidation occurs. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/context.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b11..955130762a3c6 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7f..28a45a19aae7f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -117,7 +117,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } From 250dcd11466e06df64b92520e2c56bdae453581b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 27 Nov 2017 11:28:50 +0100 Subject: [PATCH 251/406] mmc: sdhci: Avoid swiotlb buffer being full The commit de3ee99b097d ("mmc: Delete bounce buffer handling") deletes the bounce buffer handling, but also causes the max_req_size for sdhci to be increased, in case when max_segs == 1. This causes errors for sdhci-pci Ricoh variant, about the swiotlb buffer to become full. Fix the issue, by taking IO_TLB_SEGSIZE and IO_TLB_SHIFT into account when deciding the max_req_size for sdhci. Reported-by: Jiri Slaby Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling") Cc: # v4.14+ Signed-off-by: Ulf Hansson Tested-by: Jiri Slaby Acked-by: Adrian Hunter --- drivers/mmc/host/sdhci.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2f14334e42df9..e9290a3439d54 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3650,23 +3651,30 @@ int sdhci_setup_host(struct sdhci_host *host) spin_lock_init(&host->lock); + /* + * Maximum number of sectors in one transfer. Limited by SDMA boundary + * size (512KiB). Note some tuning modes impose a 4MiB limit, but this + * is less anyway. + */ + mmc->max_req_size = 524288; + /* * Maximum number of segments. Depends on if the hardware * can do scatter/gather or not. */ - if (host->flags & SDHCI_USE_ADMA) + if (host->flags & SDHCI_USE_ADMA) { mmc->max_segs = SDHCI_MAX_SEGS; - else if (host->flags & SDHCI_USE_SDMA) + } else if (host->flags & SDHCI_USE_SDMA) { mmc->max_segs = 1; - else /* PIO */ + if (swiotlb_max_segment()) { + unsigned int max_req_size = (1 << IO_TLB_SHIFT) * + IO_TLB_SEGSIZE; + mmc->max_req_size = min(mmc->max_req_size, + max_req_size); + } + } else { /* PIO */ mmc->max_segs = SDHCI_MAX_SEGS; - - /* - * Maximum number of sectors in one transfer. Limited by SDMA boundary - * size (512KiB). Note some tuning modes impose a 4MiB limit, but this - * is less anyway. - */ - mmc->max_req_size = 524288; + } /* * Maximum segment size. Could be one segment with the maximum number From 25415cec502a1232b19fffc85465882b19a90415 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Nov 2017 11:11:41 -0800 Subject: [PATCH 252/406] cls_bpf: don't decrement net's refcount when offload fails When cls_bpf offload was added it seemed like a good idea to call cls_bpf_delete_prog() instead of extending the error handling path, since the software state is fully initialized at that point. This handling of errors without jumping to the end of the function is error prone, as proven by later commit missing that extra call to __cls_bpf_delete_prog(). __cls_bpf_delete_prog() is now expected to be invoked with a reference on exts->net or the field zeroed out. The call on the offload's error patch does not fullfil this requirement, leading to each error stealing a reference on net namespace. Create a function undoing what cls_bpf_set_parms() did and use it from __cls_bpf_delete_prog() and the error path. Fixes: aae2c35ec892 ("cls_bpf: use tcf_exts_get_net() before call_rcu()") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a9f3e317055c4..6fe798c2df1a5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -258,11 +258,8 @@ static int cls_bpf_init(struct tcf_proto *tp) return 0; } -static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +static void cls_bpf_free_parms(struct cls_bpf_prog *prog) { - tcf_exts_destroy(&prog->exts); - tcf_exts_put_net(&prog->exts); - if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); else @@ -270,6 +267,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog->bpf_name); kfree(prog->bpf_ops); +} + +static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +{ + tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); + + cls_bpf_free_parms(prog); kfree(prog); } @@ -514,12 +519,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); - if (ret) { - if (!oldprog) - idr_remove_ext(&head->handle_idr, prog->handle); - __cls_bpf_delete_prog(prog); - return ret; - } + if (ret) + goto errout_parms; if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; @@ -537,6 +538,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = prog; return 0; +errout_parms: + cls_bpf_free_parms(prog); errout_idr: if (!oldprog) idr_remove_ext(&head->handle_idr, prog->handle); From f85729d07cd649bf69820f14bdad36326a24a699 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Nov 2017 14:28:39 +0100 Subject: [PATCH 253/406] sch_sfq: fix null pointer dereference at timer expiration While converting sch_sfq to use timer_setup(), the commit cdeabbb88134 ("net: sched: Convert timers to use timer_setup()") forgot to initialize the 'sch' field. As a result, the timer callback tries to dereference a NULL pointer, and the kernel does oops. Fix it initializing such field at qdisc creation time. Fixes: cdeabbb88134 ("net: sched: Convert timers to use timer_setup()") Signed-off-by: Paolo Abeni Acked-by: Cong Wang Acked-by: Kees Cook Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 890f4a4564e71..09c1203c17119 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -724,6 +724,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; + q->sch = sch; timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); err = tcf_block_get(&q->block, &q->filter_list, sch); From a8dd397903a6e57157f6265911f7d35681364427 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:56:07 +0800 Subject: [PATCH 254/406] sctp: use right member as the param of list_for_each_entry Commit d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock") made a mistake that using 'list' as the param of list_for_each_entry to traverse the retransmit, sacked and abandoned queues, while chunks are using 'transmitted_list' to link into these queues. It could cause NULL dereference panic if there are chunks in any of these queues when peeling off one asoc. So use the chunk member 'transmitted_list' instead in this patch. Fixes: d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3204a9b294071..014847e256481 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, list_for_each_entry(chunk, &t->transmitted, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->retransmit, list) + list_for_each_entry(chunk, &q->retransmit, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->sacked, list) + list_for_each_entry(chunk, &q->sacked, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->abandoned, list) + list_for_each_entry(chunk, &q->abandoned, transmitted_list) cb(chunk); list_for_each_entry(chunk, &q->out_chunk_list, list) From 5eb3d22a8a05af401257e781ddeefc412bfb8542 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 21:12:09 +0800 Subject: [PATCH 255/406] bonding: use nla_get_u64 to extract the value for IFLA_BOND_AD_ACTOR_SYSTEM bond_opt_initval expects a u64 type param, it's better to use nla_get_u64 to extract the value here, to eliminate a sparse endianness mismatch warning. Fixes: 171a42c38c6e ("bonding: add netlink support for sys prio, actor sys mac, and port key") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index a1b33aa6054a8..9697977b80f04 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -423,7 +423,7 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return -EINVAL; bond_opt_initval(&newval, - nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); + nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); if (err) return err; From fc39c38bdc46c49e1e9166afbeb686634e63cbaf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 21:19:05 +0800 Subject: [PATCH 256/406] vxlan: use __be32 type for the param vni in __vxlan_fdb_delete All callers of __vxlan_fdb_delete pass vni with __be32 type, and this param should be declared as __be32 type. Fixes: 3ad7a4b141eb ("vxlan: support fdb and learning in COLLECT_METADATA mode") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7ac487031b4bc..19b9cc51079e7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -874,8 +874,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, const unsigned char *addr, union vxlan_addr ip, - __be16 port, __be32 src_vni, u32 vni, u32 ifindex, - u16 vid) + __be16 port, __be32 src_vni, __be32 vni, + u32 ifindex, u16 vid) { struct vxlan_fdb *f; struct vxlan_rdst *rd = NULL; From c95c3fe5c744b05647240aaba4a163be36b7f123 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:06:10 +0000 Subject: [PATCH 257/406] ambassador: fix incorrect indentation of assignment statement Remove one extraneous level of indentation on assignment statement. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index acf16c323e385..bfc514015b0bc 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -2258,7 +2258,7 @@ static int amb_probe(struct pci_dev *pci_dev, PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p", dev->atm_dev->number, dev, dev->atm_dev); - dev->atm_dev->dev_data = (void *) dev; + dev->atm_dev->dev_data = (void *) dev; // register our address amb_esi (dev, dev->atm_dev->esi); From 22dac9f1fdd576c1d03ba00f4a9db9a864a43a70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:24:15 +0000 Subject: [PATCH 258/406] atm: fore200e: use %pK to format kernel addresses instead of %x Don't use %x and casting to print out a kernel address, instead use the %pK and remove the casting. Cleans up smatch warning: drivers/atm/fore200e.c:3093 fore200e_proc_read() warn: argument 3 to %08x specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 126855e6cb7d2..6ebc4e4820fc4 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -3083,8 +3083,8 @@ fore200e_proc_read(struct atm_dev *dev, loff_t* pos, char* page) ASSERT(fore200e_vcc); len = sprintf(page, - " %08x %03d %05d %1d %09lu %05d/%05d %09lu %05d/%05d\n", - (u32)(unsigned long)vcc, + " %pK %03d %05d %1d %09lu %05d/%05d %09lu %05d/%05d\n", + vcc, vcc->vpi, vcc->vci, fore200e_atm2fore_aal(vcc->qos.aal), fore200e_vcc->tx_pdu, fore200e_vcc->tx_min_pdu > 0xFFFF ? 0 : fore200e_vcc->tx_min_pdu, From 4a5def7f6a758aef1a0a3b10e981881c1e914f69 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 27 Nov 2017 05:29:32 -0800 Subject: [PATCH 259/406] VSOCK: Don't set sk_state to TCP_CLOSE before testing it A recent commit (3b4477d2dcf2) converted the sk_state to use TCP constants. In that change, vmci_transport_handle_detach was changed such that sk->sk_state was set to TCP_CLOSE before we test whether it is TCP_SYN_SENT. This change moves the sk_state change back to the original locations in that function. Signed-off-by: Jorgen Hansen Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 56573dc85709f..a7a73ffe675b2 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -804,8 +804,6 @@ static void vmci_transport_handle_detach(struct sock *sk) */ if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = TCP_CLOSE; - if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., @@ -815,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } + sk->sk_state = TCP_CLOSE; } sk->sk_state_change(sk); } From 6c9065427072b4e726a2c17f9aa2448d87a92097 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:39:32 +0000 Subject: [PATCH 260/406] atm: lanai: use %p to format kernel addresses instead of %x Don't use %x and casting to print out a kernel address, instead use %p and remove the casting. Cleans up smatch warnings: drivers/atm/lanai.c:1589 service_buffer_allocate() warn: argument 2 to %08lX specifier is cast from pointer drivers/atm/lanai.c:2221 lanai_dev_open() warn: argument 4 to %lx specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/lanai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 2351dad78ff58..7e4c2ea3e3f26 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1586,8 +1586,8 @@ static int service_buffer_allocate(struct lanai_dev *lanai) lanai->pci); if (unlikely(lanai->service.start == NULL)) return -ENOMEM; - DPRINTK("allocated service buffer at 0x%08lX, size %zu(%d)\n", - (unsigned long) lanai->service.start, + DPRINTK("allocated service buffer at %p, size %zu(%d)\n", + lanai->service.start, lanai_buf_size(&lanai->service), lanai_buf_size_cardorder(&lanai->service)); /* Clear ServWrite register to be safe */ @@ -2220,9 +2220,9 @@ static int lanai_dev_open(struct atm_dev *atmdev) #endif memcpy(atmdev->esi, eeprom_mac(lanai), ESI_LEN); lanai_timed_poll_start(lanai); - printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d, base=0x%lx, irq=%u " + printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d, base=%p, irq=%u " "(%pMF)\n", lanai->number, (int) lanai->pci->revision, - (unsigned long) lanai->base, lanai->pci->irq, atmdev->esi); + lanai->base, lanai->pci->irq, atmdev->esi); printk(KERN_NOTICE DEV_LABEL "(itf %d): LANAI%s, serialno=%u(0x%X), " "board_rev=%d\n", lanai->number, lanai->type==lanai2 ? "2" : "HB", (unsigned int) lanai->serialno, From 0195a21079c077abfb475a56830b06b37424982a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:47:22 +0000 Subject: [PATCH 261/406] atm: suni: remove extraneous space to fix indentation Remove a leading space, fixes indentation Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/suni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index b8825f2d79e02..4b044710a8cf3 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -177,7 +177,7 @@ static int set_loopback(struct atm_dev *dev,int mode) default: return -EINVAL; } - dev->ops->phy_put(dev, control, reg); + dev->ops->phy_put(dev, control, reg); PRIV(dev)->loop_mode = mode; return 0; } From d51aae68b142f48232257e96ce317db25445418d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 27 Nov 2017 18:37:21 +0100 Subject: [PATCH 262/406] net: sched: cbq: create block for q->link.block q->link.block is not initialized, that leads to EINVAL when one tries to add filter there. So initialize it properly. This can be reproduced by: $ tc qdisc add dev eth0 root handle 1: cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit $ tc filter add dev eth0 parent 1: protocol ip prio 100 u32 match ip protocol 0 0x00 flowid 1:1 Reported-by: Jaroslav Aster Reported-by: Ivan Vecera Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Signed-off-by: Jiri Pirko Acked-by: Eelco Chaudron Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6361be7881f10..525eb3a6d6251 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); + if (err) + goto put_rtab; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - goto put_rtab; + goto put_block; q->link.sibling = &q->link; q->link.common.classid = sch->handle; @@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; +put_block: + tcf_block_put(q->link.block); + put_rtab: qdisc_put_rtab(q->link.R_tab); return err; From 4650d02ad2d9b2c1c7aa36055166db6aee68f72e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 14 Nov 2017 11:35:37 -0800 Subject: [PATCH 263/406] RISC-V: Remove unused arguments from ATOMIC_OP Our atomics are generated from a complicated series of preprocessor macros, each of which is slightly different from the last. When writing the macros I'd accidentally left some unused arguments floating around. This patch removes the unused macro arguments. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 94 ++++++++++++++++----------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e2e37c57cbeb2..40c73dd59c153 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + "amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i) -ATOMIC_OPS(sub, add, +, -i) -ATOMIC_OPS(and, and, &, i) -ATOMIC_OPS( or, or, |, i) -ATOMIC_OPS(xor, xor, ^, i) +ATOMIC_OPS(add, add, i) +ATOMIC_OPS(sub, add, -i) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OP #undef ATOMIC_OPS @@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i) * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ { \ register c_type ret; \ @@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) #endif @@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, ) #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) #endif -ATOMIC_OPS(and, and, &, i, , _relaxed) -ATOMIC_OPS(and, and, &, i, .aq , _acquire) -ATOMIC_OPS(and, and, &, i, .rl , _release) -ATOMIC_OPS(and, and, &, i, .aqrl, ) +ATOMIC_OPS(and, and, i, , _relaxed) +ATOMIC_OPS(and, and, i, .aq , _acquire) +ATOMIC_OPS(and, and, i, .rl , _release) +ATOMIC_OPS(and, and, i, .aqrl, ) -ATOMIC_OPS( or, or, |, i, , _relaxed) -ATOMIC_OPS( or, or, |, i, .aq , _acquire) -ATOMIC_OPS( or, or, |, i, .rl , _release) -ATOMIC_OPS( or, or, |, i, .aqrl, ) +ATOMIC_OPS( or, or, i, , _relaxed) +ATOMIC_OPS( or, or, i, .aq , _acquire) +ATOMIC_OPS( or, or, i, .rl , _release) +ATOMIC_OPS( or, or, i, .aqrl, ) -ATOMIC_OPS(xor, xor, ^, i, , _relaxed) -ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, ^, i, .rl , _release) -ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) +ATOMIC_OPS(xor, xor, i, , _relaxed) +ATOMIC_OPS(xor, xor, i, .aq , _acquire) +ATOMIC_OPS(xor, xor, i, .rl , _release) +ATOMIC_OPS(xor, xor, i, .aqrl, ) #undef ATOMIC_OPS @@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ { \ atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ { \ return atomic##prefix##_fetch_##func_op(I, v); \ @@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ + ATOMIC_OP (op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif From 8286d51a6c244738aeb071fcd7d2e36a3374e150 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:02:50 -0800 Subject: [PATCH 264/406] RISC-V: Comment on why {,cmp}xchg is ordered how it is This is another memory model FIXME. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 40c73dd59c153..e65d1cd89e28b 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. We just - * use the other implementations directly. + * {cmp,}xchg and the operations that return, so they need a barrier. + */ +/* + * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by + * assigning the same barrier to both the LR and SC operations, but that might + * not make any sense. We're waiting on a memory model specification to + * determine exactly what the right thing to do is here. */ #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ From 61a60d35b7d1b0b3a31bc21d15805a3654f60920 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:48 -0800 Subject: [PATCH 265/406] RISC-V: Remove __smp_bp__{before,after}_atomic These duplicate the asm-generic definitions are therefor aren't useful. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 183534b7c39b7..455ee16127fbc 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,21 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These fences exist to enforce ordering around the relaxed AMOs. The - * documentation defines that - * " - * atomic_fetch_add(); - * is equivalent to: - * smp_mb__before_atomic(); - * atomic_fetch_add_relaxed(); - * smp_mb__after_atomic(); - * " - * So we emit full fences on both sides. - */ -#define __smb_mb__before_atomic() smp_mb() -#define __smb_mb__after_atomic() smp_mb() - /* * These barriers prevent accesses performed outside a spinlock from being moved * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only From 3343eb6806f365b9e3d451040671fa9336e57513 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:55 -0800 Subject: [PATCH 266/406] RISC-V: Remove smb_mb__{before,after}_spinlock() These are obselete. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 455ee16127fbc..773c4e039cd72 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,14 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These barriers prevent accesses performed outside a spinlock from being moved - * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only - * enforce release consistency, we need full fences here. - */ -#define smb_mb__before_spinlock() smp_mb() -#define smb_mb__after_spinlock() smp_mb() - #include #endif /* __ASSEMBLY__ */ From 9347ce54cd699db92d37e66191aa4b9a0a92304e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:04:05 -0800 Subject: [PATCH 267/406] RISC-V: __test_and_op_bit_ord should be strongly ordered I mis-read the documentation. After looking at it again the documentation is actually as clear as it can be, it's just that I didn't actually read it in order and therefor did the wrong thing. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 7c281ef1d5832..f30daf26f08f4 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -67,7 +67,7 @@ : "memory"); #define __test_and_op_bit(op, mod, nr, addr) \ - __test_and_op_bit_ord(op, mod, nr, addr, ) + __test_and_op_bit_ord(op, mod, nr, addr, .aqrl) #define __op_bit(op, mod, nr, addr) \ __op_bit_ord(op, mod, nr, addr, ) From 21db403660d1433b8a02b26d5d4084921b857c40 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:05:04 -0800 Subject: [PATCH 268/406] RISC-V: Add READ_ONCE in arch_spin_is_locked() This was just incorrect in the original version. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 04c71d938afdb..a6a005c4f2fb4 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -24,7 +24,7 @@ /* FIXME: Replace this with a ticket lock, like MIPS. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) static inline void arch_spin_unlock(arch_spinlock_t *lock) { From c901e45a999a1935d7adf653e1cf12dfbcd737aa Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:17 -0800 Subject: [PATCH 269/406] RISC-V: `sfence.vma` orderes the instruction cache This is just a comment change, but it's one that bit me on the mailing list. It turns out that issuing a `sfence.vma` enforces instruction cache ordering in addition to TLB ordering. This isn't explicitly called out in the ISA manual, but Andrew will be making that more clear in a future revision. CC: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e5..c79fab3d377d3 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,7 +17,10 @@ #ifdef CONFIG_MMU -/* Flush entire local TLB */ +/* + * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction + * cache as well, so a 'fence.i' is not necessary. + */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); From bf730552734372e45b10fe056726de1950fdfdde Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:31 -0800 Subject: [PATCH 270/406] RISC-V: remove spin_unlock_wait() This was removed from the other architectures in commit 952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific definitions"). That landed between when we got upstream and when our patches were reviewed, so this is a followup patch. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index a6a005c4f2fb4..2fd27e8ef1fd6 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) } } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_rmb(); - do { - cpu_relax(); - } while (arch_spin_is_locked(lock)); - smp_acquire__after_ctrl_dep(); -} - /***********************************************************/ static inline void arch_read_lock(arch_rwlock_t *lock) From b43aaee69d4327d05e7624d9471c17d015b4d67d Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 21 Nov 2017 09:08:07 -0500 Subject: [PATCH 271/406] drm/amdgpu: move UVD/VCE and VCN structure out from union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the enablement of VCN Dec and Enc from user space, User space queries kernel for the IP information, if HW has UVD/VCE, the info comes from these IP blocks, but this could end up mis-interpret for VCN when they are in the union, the other way same when HW with VCN block. Signed-off-by: Leo Liu Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Fixes: 95d0906f8506 ("drm/amdgpu: add initial vcn support and decode tests") Cc: stable@vger.kernel.org Reviewed-and-Tested-by: Michel Dänzer --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5afaf6016b4a6..c25cedff4915d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1572,18 +1572,14 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - union { - struct { - /* uvd */ - struct amdgpu_uvd uvd; - - /* vce */ - struct amdgpu_vce vce; - }; - - /* vcn */ - struct amdgpu_vcn vcn; - }; + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + + /* vcn */ + struct amdgpu_vcn vcn; /* firmwares */ struct amdgpu_firmware firmware; From ed162fe7643023b52cc21998a6b3eff15a233c5e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Nov 2017 15:41:30 -0500 Subject: [PATCH 272/406] drm/amdgpu/gfx7: cache raster_config values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We did this for gfx6 and 8, but somehow missed gfx7. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5c8a7a48a4adb..419ba0ce7ee5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1819,6 +1819,22 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.backend_enable_mask, num_rb_pipes); } + + /* cache the values for userspace */ + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + adev->gfx.config.rb_config[i][j].rb_backend_disable = + RREG32(mmCC_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].user_rb_backend_disable = + RREG32(mmGC_USER_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].raster_config = + RREG32(mmPA_SC_RASTER_CONFIG); + adev->gfx.config.rb_config[i][j].raster_config_1 = + RREG32(mmPA_SC_RASTER_CONFIG_1); + } + } + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } From aca31681b1a594dd1a25a51ff2c58f4f4a165446 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Nov 2017 13:53:29 -0500 Subject: [PATCH 273/406] drm/amdgpu: used cached gca values for cik_read_register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the cached values has less latency for bare metal and prevents reading back bogus values if the engine is powergated. This was implemented for VI and SI, but somehow CIK got missed. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik.c | 111 ++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 793b1470284d6..a296f7bbe57cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1023,22 +1023,101 @@ static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {mmPA_SC_RASTER_CONFIG_1, true}, }; -static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, - u32 reg_offset) + +static uint32_t cik_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) { - uint32_t val; + if (indexed) { + uint32_t val; + unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num; + unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num; + + switch (reg_offset) { + case mmCC_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable; + case mmGC_USER_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable; + case mmPA_SC_RASTER_CONFIG: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config; + case mmPA_SC_RASTER_CONFIG_1: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1; + } - mutex_lock(&adev->grbm_idx_mutex); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); - val = RREG32(reg_offset); + val = RREG32(reg_offset); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - return val; + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; + } else { + unsigned idx; + + switch (reg_offset) { + case mmGB_ADDR_CONFIG: + return adev->gfx.config.gb_addr_config; + case mmMC_ARB_RAMCFG: + return adev->gfx.config.mc_arb_ramcfg; + case mmGB_TILE_MODE0: + case mmGB_TILE_MODE1: + case mmGB_TILE_MODE2: + case mmGB_TILE_MODE3: + case mmGB_TILE_MODE4: + case mmGB_TILE_MODE5: + case mmGB_TILE_MODE6: + case mmGB_TILE_MODE7: + case mmGB_TILE_MODE8: + case mmGB_TILE_MODE9: + case mmGB_TILE_MODE10: + case mmGB_TILE_MODE11: + case mmGB_TILE_MODE12: + case mmGB_TILE_MODE13: + case mmGB_TILE_MODE14: + case mmGB_TILE_MODE15: + case mmGB_TILE_MODE16: + case mmGB_TILE_MODE17: + case mmGB_TILE_MODE18: + case mmGB_TILE_MODE19: + case mmGB_TILE_MODE20: + case mmGB_TILE_MODE21: + case mmGB_TILE_MODE22: + case mmGB_TILE_MODE23: + case mmGB_TILE_MODE24: + case mmGB_TILE_MODE25: + case mmGB_TILE_MODE26: + case mmGB_TILE_MODE27: + case mmGB_TILE_MODE28: + case mmGB_TILE_MODE29: + case mmGB_TILE_MODE30: + case mmGB_TILE_MODE31: + idx = (reg_offset - mmGB_TILE_MODE0); + return adev->gfx.config.tile_mode_array[idx]; + case mmGB_MACROTILE_MODE0: + case mmGB_MACROTILE_MODE1: + case mmGB_MACROTILE_MODE2: + case mmGB_MACROTILE_MODE3: + case mmGB_MACROTILE_MODE4: + case mmGB_MACROTILE_MODE5: + case mmGB_MACROTILE_MODE6: + case mmGB_MACROTILE_MODE7: + case mmGB_MACROTILE_MODE8: + case mmGB_MACROTILE_MODE9: + case mmGB_MACROTILE_MODE10: + case mmGB_MACROTILE_MODE11: + case mmGB_MACROTILE_MODE12: + case mmGB_MACROTILE_MODE13: + case mmGB_MACROTILE_MODE14: + case mmGB_MACROTILE_MODE15: + idx = (reg_offset - mmGB_MACROTILE_MODE0); + return adev->gfx.config.macrotile_mode_array[idx]; + default: + return RREG32(reg_offset); + } + } } static int cik_read_register(struct amdgpu_device *adev, u32 se_num, @@ -1048,13 +1127,13 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(cik_allowed_read_registers); i++) { + bool indexed = cik_allowed_read_registers[i].grbm_indexed; + if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; From b693fc1f83bc9aa5e86c87ac7da48870e45bf486 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Nov 2017 17:46:50 -0500 Subject: [PATCH 274/406] Revert "drm/amdgpu: fix rmmod KCQ disable failed error" This reverts commit 446947b44fb8cabc0213ff4efd706931e36b1963. this patch is incorrrect, amdgpu_ucode_bo_fini always called after gfx_hw_fini. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2c574374d9b68..3573ecdb06eef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1837,9 +1837,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_fini_bo(adev); - for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 033fba2def6f7..5f5aa5fddc169 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -164,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7714f4a6c8b00..447d446b50150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -442,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); From 89ce6e0afee8eafa679093207dabd717af9d09c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: [PATCH 275/406] drm/amdgpu: Set adev->vcn.irq.num_types for VCN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were setting adev->uvd.irq.num_types instead. Fixes: 9b257116e784 ("drm/amdgpu: add vcn enc irq support") Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1eb4d79d6e306..0450ac5ba6b6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1175,7 +1175,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; } From fa7c7939b4bf112cd06ba166b739244077898990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: [PATCH 276/406] drm/amdgpu: Use unsigned ring indices in amdgpu_queue_mgr_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This matches the corresponding UAPI fields. Treating the ring index as signed could result in accessing random unrelated memory if the MSB was set. Fixes: effd924d2f3b ("drm/amdgpu: untie user ring ids from kernel ring ids v6") Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c25cedff4915d..0b14b53737834 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -717,7 +717,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr); int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 190e28cb827e5..93d86619e802c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -63,7 +63,7 @@ static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, static int amdgpu_identity_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int ring, + u32 ring, struct amdgpu_ring **out_ring) { switch (mapper->hw_ip) { @@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) static int amdgpu_lru_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int user_ring, bool lru_pipe_order, + u32 user_ring, bool lru_pipe_order, struct amdgpu_ring **out_ring) { int r, i, j; @@ -208,7 +208,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, */ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring) { int r, ip_num_rings; From 6edc6910ba4cd6eab309263539c8f09b8ad772bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: [PATCH 277/406] drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a57cec737c18a..57abf7abd7a9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -409,6 +409,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate->robj == validated) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ From acc34503bd22bd826bbf7b7f95c84a06bb6380e4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2017 14:50:01 -0400 Subject: [PATCH 278/406] drm/amdgpu: drop experimental flag for raven Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ec96bb1f9eafb..c2f414ffb2cc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -536,7 +536,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, /* Raven */ - {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0, 0, 0} }; From 1b6c80674192bd6b235de7bc58f0bd03eb7f89df Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 13 Oct 2017 12:13:02 -0400 Subject: [PATCH 279/406] drm/amd/display: Add null check for 24BPP (xfm and dpp) Fixes Nullptr error when trying 24BPP Signed-off-by: Bhawanpreet Lakha Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d1cdf9f8853d7..a2f9be3716cff 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -856,6 +856,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + /* Taps calculations */ if (pipe_ctx->plane_res.xfm != NULL) res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( @@ -864,16 +865,21 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (pipe_ctx->plane_res.dpp != NULL) res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); - if (!res) { /* Try 24 bpp linebuffer */ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_24BPP; - res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( - pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); - - res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( - pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + if (pipe_ctx->plane_res.xfm != NULL) + res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( + pipe_ctx->plane_res.xfm, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); + + if (pipe_ctx->plane_res.dpp != NULL) + res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( + pipe_ctx->plane_res.dpp, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); } if (res) From 827f11e97dbc591f0c9619151b9a89f082e8ecb8 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Tue, 17 Oct 2017 17:18:24 -0400 Subject: [PATCH 280/406] drm/amd/display: Should disable when new stream is null core_link_disable_stream should be called when the new stream is null (i.e. want to disable). Modify the if condition to reflect that. Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 1229a3315018e..be14a26650728 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1868,8 +1868,10 @@ static void dce110_reset_hw_ctx_wrap( pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { struct clock_source *old_clk = pipe_ctx_old->clock_source; - /* disable already, no need to disable again */ - if (pipe_ctx->stream && !pipe_ctx->stream->dpms_off) + /* Disable if new stream is null. O/w, if stream is + * disabled already, no need to disable again. + */ + if (!pipe_ctx->stream || !pipe_ctx->stream->dpms_off) core_link_disable_stream(pipe_ctx_old, FREE_ACQUIRED_RESOURCE); pipe_ctx_old->stream_res.tg->funcs->set_blank(pipe_ctx_old->stream_res.tg, true); From 6bffebc90c23e2341a1f8371e7b496ec94136e47 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 18 Oct 2017 20:22:40 -0400 Subject: [PATCH 281/406] drm/amd/display: Add timing validation against dongle cap For DP active dongles, the dpcd dongle caps are read but not used to validate mode timing. This addresses this. In particular, this change fixes light up on the HDMI 4k TV connected through DP active dongle. Since the 4k TV defaults to YCbCr420, which the dongle don't support. This change does not address MST cases, a more generalized approach must be taken for that. Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 70 ++++++++++++++++++- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 21 +++++- .../gpu/drm/amd/display/dc/inc/core_status.h | 2 +- 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index fe63f5894d43b..15625fd944559 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -128,6 +128,7 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; enc_init.channel = CHANNEL_ID_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 0602610489d75..73077869151cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,12 +1801,75 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } +bool dp_active_dongle_validate_timing( + const struct dc_crtc_timing *timing, + const struct dc_dongle_caps *dongle_caps) +{ + unsigned int required_pix_clk = timing->pix_clk_khz; + + if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || + dongle_caps->extendedCapValid == false) + return true; + + /* Check Pixel Encoding */ + switch (timing->pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + break; + case PIXEL_ENCODING_YCBCR422: + if (!dongle_caps->is_dp_hdmi_ycbcr422_pass_through) + return false; + break; + case PIXEL_ENCODING_YCBCR420: + if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through) + return false; + break; + default: + /* Invalid Pixel Encoding*/ + return false; + } + + + /* Check Color Depth and Pixel Clock */ + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + required_pix_clk /= 2; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + case COLOR_DEPTH_888: + /*888 and 666 should always be supported*/ + break; + case COLOR_DEPTH_101010: + if (dongle_caps->dp_hdmi_max_bpc < 10) + return false; + required_pix_clk = required_pix_clk * 10 / 8; + break; + case COLOR_DEPTH_121212: + if (dongle_caps->dp_hdmi_max_bpc < 12) + return false; + required_pix_clk = required_pix_clk * 12 / 8; + break; + + case COLOR_DEPTH_141414: + case COLOR_DEPTH_161616: + default: + /* These color depths are currently not supported */ + return false; + } + + if (required_pix_clk > dongle_caps->dp_hdmi_max_pixel_clk) + return false; + + return true; +} + enum dc_status dc_link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, const struct dc_crtc_timing *timing) { uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; + struct dc_dongle_caps *dongle_caps = &link->link_status.dpcd_caps->dongle_caps; /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle @@ -1814,8 +1877,13 @@ enum dc_status dc_link_validate_mode_timing( if (link->remote_sinks[0]) return DC_OK; + /* Passive Dongle */ if (0 != max_pix_clk && timing->pix_clk_khz > max_pix_clk) - return DC_EXCEED_DONGLE_MAX_CLK; + return DC_EXCEED_DONGLE_CAP; + + /* Active Dongle*/ + if (!dp_active_dongle_validate_timing(timing, dongle_caps)) + return DC_EXCEED_DONGLE_CAP; switch (stream->signal) { case SIGNAL_TYPE_EDP: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index ced42484dcfc7..8e97b42a03a27 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2062,6 +2062,24 @@ bool is_dp_active_dongle(const struct dc_link *link) (dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER); } +static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) +{ + switch (bpc) { + case DOWN_STREAM_MAX_8BPC: + return 8; + case DOWN_STREAM_MAX_10BPC: + return 10; + case DOWN_STREAM_MAX_12BPC: + return 12; + case DOWN_STREAM_MAX_16BPC: + return 16; + default: + break; + } + + return -1; +} + static void get_active_converter_info( uint8_t data, struct dc_link *link) { @@ -2131,7 +2149,8 @@ static void get_active_converter_info( hdmi_caps.bits.YCrCr420_CONVERSION; link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc = - hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT; + translate_dpcd_max_bpc( + hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); link->dpcd_caps.dongle_caps.extendedCapValid = true; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index 01df85641684f..94fc31080fdad 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -38,7 +38,7 @@ enum dc_status { DC_FAIL_DETACH_SURFACES = 8, DC_FAIL_SURFACE_VALIDATE = 9, DC_NO_DP_LINK_BANDWIDTH = 10, - DC_EXCEED_DONGLE_MAX_CLK = 11, + DC_EXCEED_DONGLE_CAP = 11, DC_SURFACE_PIXEL_FORMAT_UNSUPPORTED = 12, DC_FAIL_BANDWIDTH_VALIDATE = 13, /* BW and Watermark validation */ DC_FAIL_SCALING = 14, From 3eb4eba42263cc1e810d79b4970cbcb096c210ab Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 20 Oct 2017 10:15:18 -0400 Subject: [PATCH 282/406] drm/amd/display: Fix S3 topology change Clean fake sink flag on resume if real sink connected. Fixing S3 topology change problem like this: 1) x desktop with 1 or > displays 2) unplug display 3) suspend 4) replug same display 5) resume without this change replugged display doesn't light up Signed-off-by: Roman Li Reviewed-by: Sun peng Li Acked-by: Harry Wentland Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 889ed24084e86..009aaeb263e3a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -677,6 +677,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) mutex_lock(&aconnector->hpd_lock); dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + + if (aconnector->fake_enable && aconnector->dc_link->local_sink) + aconnector->fake_enable = false; + aconnector->dc_sink = NULL; amdgpu_dm_update_connector_after_detect(aconnector); mutex_unlock(&aconnector->hpd_lock); From 4ddd76d1ce00bb0d78e73e29fd9062ecd6bad611 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 23 Oct 2017 15:37:37 -0400 Subject: [PATCH 283/406] drm/amd/display: fix split recout calculation Recout split rounding code was wrong Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a2f9be3716cff..ced339a145c60 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,14 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { - pipe_ctx->plane_res.scl_data.recout.height /= 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; /* Floor primary pipe, ceil 2ndary pipe */ - pipe_ctx->plane_res.scl_data.recout.height += pipe_ctx->plane_res.scl_data.recout.height % 2; + pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { - pipe_ctx->plane_res.scl_data.recout.width /= 2; + pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->plane_res.scl_data.recout.width += pipe_ctx->plane_res.scl_data.recout.width % 2; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { From 58fe8990fc29336c9ea6531d9ecc44466d3b9221 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 23 Oct 2017 09:11:46 -0400 Subject: [PATCH 284/406] drm/amd/display: Handle as MST first and then DP dongle if sink support both Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Reviewed-by: Wenjing Liu Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 73077869151cf..e27ed4a452652 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -480,22 +480,6 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; detect_dp_sink_caps(link); - /* DP active dongles */ - if (is_dp_active_dongle(link)) { - link->type = dc_connection_active_dongle; - if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { - /* - * active dongle unplug processing for short irq - */ - link_disconnect_sink(link); - return; - } - - if (link->dpcd_caps.dongle_type != - DISPLAY_DONGLE_DP_HDMI_CONVERTER) { - *converter_disable_audio = true; - } - } if (is_mst_supported(link)) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT_MST; link->type = dc_connection_mst_branch; @@ -535,6 +519,22 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; } } + + if (link->type != dc_connection_mst_branch && + is_dp_active_dongle(link)) { + /* DP active dongles */ + link->type = dc_connection_active_dongle; + if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { + /* + * active dongle unplug processing for short irq + */ + link_disconnect_sink(link); + return; + } + + if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER) + *converter_disable_audio = true; + } } else { /* DP passive dongles */ sink_caps->signal = dp_passive_dongle_detection(link->ddc, From 1c72be981e8a18b4b4ad9d7fc4620bd19c2aba7f Mon Sep 17 00:00:00 2001 From: Andrew Jiang Date: Tue, 24 Oct 2017 12:00:55 -0400 Subject: [PATCH 285/406] drm/amd/display: Don't reject 3D timings Signed-off-by: Andrew Jiang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c index c7333cdf18021..fced178c8c794 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c @@ -496,9 +496,6 @@ static bool tgn10_validate_timing( timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) return false; - if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && - tg->ctx->dc->debug.disable_stereo_support) - return false; /* Temporarily blocking interlacing mode until it's supported */ if (timing->flags.INTERLACE == 1) return false; From 116b2632ab033b2a755c1c7b703fede6860b3140 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 24 Oct 2017 17:57:38 -0400 Subject: [PATCH 286/406] drm/amd/display: fix split recout offset Previous recout calculation fix changed recout size rounding and affected the offset when it should not have Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ced339a145c60..1cd15d1cc3f41 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,12 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2; /* Floor primary pipe, ceil 2ndary pipe */ pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { + pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width / 2; pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { From 30ec2b9717c1c6616332f8ce7c0cb3dd72032a2e Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 3 Nov 2017 16:04:34 -0400 Subject: [PATCH 287/406] drm/amd/display: Check aux channel before MST resume It is to fix: MST display failed to resume from S3 At the beginning of resume from S3, need to check if mgr->aux is NULL. Fake MST encoder doesn't have real aux channel. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 009aaeb263e3a..5293604a894b7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -520,7 +520,8 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); - if (aconnector->dc_link->type == dc_connection_mst_branch) { + if (aconnector->dc_link->type == dc_connection_mst_branch && + aconnector->mst_mgr.aux) { DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n", aconnector, aconnector->base.base.id); From 16fb754a294d25f4077e1f475d930c22698f442a Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 10 Nov 2017 13:53:53 -0500 Subject: [PATCH 288/406] drm/amd/display: fix split viewport rounding error Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ++++ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b091..983987f7c9835 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,6 +890,10 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1cd15d1cc3f41..655e08df48a01 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -516,13 +516,11 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; if (right_view) { - data->viewport.width /= 2; - data->viewport_c.width /= 2; - data->viewport.x += data->viewport.width; - data->viewport_c.x += data->viewport_c.width; + data->viewport.x += data->viewport.width / 2; + data->viewport_c.x += data->viewport_c.width / 2; /* Ceil offset pipe */ - data->viewport.width += data->viewport.width % 2; - data->viewport_c.width += data->viewport_c.width % 2; + data->viewport.width = (data->viewport.width + 1) / 2; + data->viewport_c.width = (data->viewport_c.width + 1) / 2; } else { data->viewport.width /= 2; data->viewport_c.width /= 2; From 1b61973f9e0e7724bdc779ef3f9b55bd21b08db4 Mon Sep 17 00:00:00 2001 From: Jordan Lazare Date: Mon, 13 Nov 2017 17:57:33 -0400 Subject: [PATCH 289/406] drm/amd/display: Revert noisy assert messages This partially reverts commit 4fb48bb66211 ("dc: fix split viewport rounding error"). Signed-off-by: Jordan Lazare Reviewed-by: Dmytro Laktyushkin Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 983987f7c9835..3dce35e66b091 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,10 +890,6 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } - ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value - || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); - ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value - || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); From 8ffca5dca093cdd25264e782cc0c4539cb318925 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Fri, 10 Nov 2017 15:02:19 -0500 Subject: [PATCH 290/406] drm/amd/display: Do DC mode-change check when adding CRTCs Within atomic check, dm_update_crtcs_state is called twice. First to remove from the dc_state, and subsequently to add to it. In both calls, a secondary mode-change check is done using dc-level states. We shouldn't be doing this while removing, since a new dc_stream_state has not been created to do the necessary comparison. Because of this, the mode_changed flag within the DRM state can be mistakenly set to false. Doing so only when adding prevents this. We are also guaranteed that a call to add will come after remove, or else the atomic check fails (and a commit will not happen). Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5293604a894b7..8b0fb25d84a6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4460,7 +4460,7 @@ static int dm_update_crtcs_state(struct dc *dc, } } - if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && + if (enable && dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { new_crtc_state->mode_changed = false; From 93984bbc70b3f321b8f6a3ec303e31b084e54230 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Mon, 20 Nov 2017 10:37:08 +0530 Subject: [PATCH 291/406] drm/amd/display: check plane state before validating fbc While validation fbc, array_mode of the pipe is accessed without checking plane_state exists for it. Causing to null pointer dereferencing followed by reboot when a crtc associated with external display(not connected) is page flipped. This patch adds a check for plane_state before using it to validate fbc. Signed-off-by: Shirish S Reviewed-by: Roman Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index be14a26650728..c3de7f305cd31 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1774,6 +1774,10 @@ static enum dc_status validate_fbc(struct dc *dc, if (pipe_ctx->stream->sink->link->psr_enabled) return DC_ERROR_UNEXPECTED; + /* Nothing to compress */ + if (!pipe_ctx->plane_state) + return DC_ERROR_UNEXPECTED; + /* Only for non-linear tiling */ if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) return DC_ERROR_UNEXPECTED; From 2f2c3b36fc5a3dc9fd2cf22a2f368502a70eea5d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 16:47:35 +0000 Subject: [PATCH 292/406] drm/amd/display: fix memory leaks on error exit return Currently in the case where some of the allocations fail for dce110_tgv, dce110_xfmv, dce110_miv or dce110_oppv then the exit return path ends up leaking allocated objects. Fix this by kfree'ing them before returning. Also re-work the comparison of the null pointers to use the !ptr idiom. Detected by CoverityScan, CID#1460246, 1460325, 1460324, 1460392 ("Resource Leak") Fixes: c4562236b3bc ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Colin Ian King Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce110/dce110_resource.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index db96d2b47ff16..61adb8174ce09 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1037,11 +1037,13 @@ static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) struct dce110_opp *dce110_oppv = kzalloc(sizeof(*dce110_oppv), GFP_KERNEL); - if ((dce110_tgv == NULL) || - (dce110_xfmv == NULL) || - (dce110_miv == NULL) || - (dce110_oppv == NULL)) - return false; + if (!dce110_tgv || !dce110_xfmv || !dce110_miv || !dce110_oppv) { + kfree(dce110_tgv); + kfree(dce110_xfmv); + kfree(dce110_miv); + kfree(dce110_oppv); + return false; + } dce110_opp_v_construct(dce110_oppv, ctx); From 2b7c97d687e81db07d8c67b32ff920e7bf59444e Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:27:31 -0500 Subject: [PATCH 293/406] drm/amd/display: fix seq issue: turn on clock before programming afmt. Signed-off-by: Charlene Liu Reviewed-by: Krunoslav Kovac Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/dc/dce/dce_stream_encoder.c | 5 +++++ .../display/dc/dce110/dce110_hw_sequencer.c | 22 +++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 4fd49a16c3b6e..c04d67db9cea8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,6 +87,11 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; + REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); + /*we need turn on clock before programming AFMT block*/ + if (regval != 1) + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) ASSERT(0); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index c3de7f305cd31..07ff8d2faf3f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -991,6 +991,16 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) struct dc_link *link = stream->sink->link; struct dc *dc = pipe_ctx->stream->ctx->dc; + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( + pipe_ctx->stream_res.stream_enc); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( + pipe_ctx->stream_res.stream_enc); + + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); @@ -1015,18 +1025,6 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) */ } - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( - pipe_ctx->stream_res.stream_enc); - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( - pipe_ctx->stream_res.stream_enc); - - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, true); - - /* blank at encoder level */ if (dc_is_dp_signal(pipe_ctx->stream->signal)) { if (pipe_ctx->stream->sink->link->connector_signal == SIGNAL_TYPE_EDP) From cfb071f7a9673109415d097125b3c12c16836acc Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:55:57 -0500 Subject: [PATCH 294/406] drm/amd/display: try to find matching audio inst for enc inst first [Description] in eDP+ HDMI/DP clone or extended configuration, audio inst changed from inst 1 to inst0. No failure related this though, just playback device endpoint inst changed. Also remove one addition register read. Signed-off-by: Charlene Liu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c | 4 +--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 655e08df48a01..546fe99d34f25 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1449,11 +1449,16 @@ static struct stream_encoder *find_first_free_match_stream_enc_for_link( static struct audio *find_first_free_audio( struct resource_context *res_ctx, - const struct resource_pool *pool) + const struct resource_pool *pool, + enum engine_id id) { int i; for (i = 0; i < pool->audio_count; i++) { if ((res_ctx->is_audio_acquired[i] == false) && (res_ctx->is_stream_enc_acquired[i] == true)) { + /*we have enough audio endpoint, find the matching inst*/ + if (id != i) + continue; + return pool->audios[i]; } } @@ -1702,7 +1707,7 @@ enum dc_status resource_map_pool_resources( dc_is_audio_capable_signal(pipe_ctx->stream->signal) && stream->audio_info.mode_count) { pipe_ctx->stream_res.audio = find_first_free_audio( - &context->res_ctx, pool); + &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id); /* * Audio assigned in order first come first get. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index c04d67db9cea8..e42b6eb1c1f0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,10 +87,8 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; - REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); /*we need turn on clock before programming AFMT block*/ - if (regval != 1) - REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) From 70e8ffc55b98f31af700eae525fef5d0b8fcb6e1 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:19:02 -0500 Subject: [PATCH 295/406] drm/amd/display: Fix amdgpu_dm bugs found by smatch drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2760 create_eml_sink() warn: variable dereferenced before check 'aconnector->base.edid_blob_ptr' (see line 2758) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4270 amdgpu_dm_atomic_commit_tail() warn: variable dereferenced before check 'dm_new_crtc_state->stream' (see line 4266) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4417 dm_restore_drm_connector_state() warn: variable dereferenced before check 'disconnected_acrtc' (see line 4415) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8b0fb25d84a6e..5025dafb0517a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2709,7 +2709,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; - struct edid *edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + struct edid *edid; if (!aconnector->base.edid_blob_ptr || !aconnector->base.edid_blob_ptr->data) { @@ -2721,6 +2721,8 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) return; } + edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + aconnector->edid = edid; aconnector->dc_em_sink = dc_link_add_remote_sink( @@ -4198,13 +4200,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); + if (!dm_new_crtc_state->stream) + continue; + status = dc_stream_get_status(dm_new_crtc_state->stream); WARN_ON(!status); WARN_ON(!status->plane_count); - if (!dm_new_crtc_state->stream) - continue; - /*TODO How it works with MPO ?*/ if (!dc_commit_planes_to_stream( dm->dc, @@ -4337,9 +4339,11 @@ void dm_restore_drm_connector_state(struct drm_device *dev, return; disconnected_acrtc = to_amdgpu_crtc(connector->encoder->crtc); - acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!disconnected_acrtc) + return; - if (!disconnected_acrtc || !acrtc_state->stream) + acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!acrtc_state->stream) return; /* From a8f9764731968d6a63f6f98c5b0d4e7a0e4154e2 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:56:15 -0500 Subject: [PATCH 296/406] drm/amd/display: Bunch of smatch error and warning fixes in DC drivers/gpu/drm/amd/amdgpu/../display/dc/basics/log_helpers.c:79 dc_conn_log() error: buffer overflow 'signal_type_info_tbl' 10 <= 10 drivers/gpu/drm/amd/amdgpu/../display/dc/bios/bios_parser.c:266 bios_parser_get_dst_obj() error: uninitialized symbol 'id'. drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_audio.c:357 dce_aud_az_enable() warn: inconsistent indenting drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:958 dcn10_acquire_idle_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 952) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/log_helpers.c | 5 +++++ drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 10 +++++----- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c index 785b943b60ed1..6e43168fbdd65 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c @@ -75,6 +75,9 @@ void dc_conn_log(struct dc_context *ctx, if (signal == signal_type_info_tbl[i].type) break; + if (i == NUM_ELEMENTS(signal_type_info_tbl)) + goto fail; + dm_logger_append(&entry, "[%s][ConnIdx:%d] ", signal_type_info_tbl[i].name, link->link_index); @@ -96,6 +99,8 @@ void dc_conn_log(struct dc_context *ctx, dm_logger_append(&entry, "^\n"); dm_helpers_dc_conn_log(ctx, &entry, event); + +fail: dm_logger_close(&entry); va_end(args); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index aaaebd06d7ee3..86e6438c5cf35 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -249,7 +249,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, struct graphics_object_id *dest_object_id) { uint32_t number; - uint16_t *id; + uint16_t *id = NULL; ATOM_OBJECT *object; struct bios_parser *bp = BP_FROM_DCB(dcb); @@ -260,7 +260,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, number = get_dest_obj_list(bp, object, &id); - if (number <= index) + if (number <= index || !id) return BP_RESULT_BADINPUT; *dest_object_id = object_id_from_bios_object_id(id[index]); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 81c40f8864db2..0df9ecb2710c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -352,11 +352,11 @@ void dce_aud_az_enable(struct audio *audio) uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - CLOCK_GATING_DISABLE); - set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - AUDIO_ENABLED); + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + set_reg_field_value(value, 1, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + AUDIO_ENABLED); AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4c4bd72d4e405..9fc8f827f2a18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -912,11 +912,13 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) - return false; + return NULL; idle_pipe->stream = head_pipe->stream; idle_pipe->stream_res.tg = head_pipe->stream_res.tg; From b001965d4eb5e4c0e3de7dc6dce1d45ca25232a7 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 12:11:44 -0500 Subject: [PATCH 297/406] drm/amd/display: Fix use before NULL check in validate_timing Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dce110/dce110_timing_generator.c:1124 dce110_timing_generator_validate_timing() warn: variable dereferenced before check 'timing' (see line 1116) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dce110/dce110_timing_generator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 67ac737eaa7e9..4befce6cd87a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1112,10 +1112,7 @@ bool dce110_timing_generator_validate_timing( enum signal_type signal) { uint32_t h_blank; - uint32_t h_back_porch; - uint32_t hsync_offset = timing->h_border_right + - timing->h_front_porch; - uint32_t h_sync_start = timing->h_addressable + hsync_offset; + uint32_t h_back_porch, hsync_offset, h_sync_start; struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); @@ -1124,6 +1121,9 @@ bool dce110_timing_generator_validate_timing( if (!timing) return false; + hsync_offset = timing->h_border_right + timing->h_front_porch; + h_sync_start = timing->h_addressable + hsync_offset; + /* Currently we don't support 3D, so block all 3D timings */ if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) return false; From b3fb2b4e21a995c4fa511627088bd55b88f6be11 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 19:49:44 -0500 Subject: [PATCH 298/406] drm/amd/display: Fix hubp check in set_cursor_position Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c:298 dc_stream_set_cursor_position() error: we previously assumed 'hubp' could be null (see line 294) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 9 ++++----- drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 7 ------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index b00a6040a6974..e230cc44a0a7d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -263,7 +263,6 @@ bool dc_stream_set_cursor_position( struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; struct mem_input *mi = pipe_ctx->plane_res.mi; struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct transform *xfm = pipe_ctx->plane_res.xfm; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_cursor_position pos_cpy = *position; struct dc_cursor_mi_param param = { @@ -294,11 +293,11 @@ bool dc_stream_set_cursor_position( if (mi != NULL && mi->funcs->set_cursor_position != NULL) mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); - if (hubp != NULL && hubp->funcs->set_cursor_position != NULL) - hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); + if (!hubp) + continue; - if (xfm != NULL && xfm->funcs->set_cursor_position != NULL) - xfm->funcs->set_cursor_position(xfm, &pos_cpy, ¶m, hubp->curs_attr.width); + if (hubp->funcs->set_cursor_position != NULL) + hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); if (dpp != NULL && dpp->funcs->set_cursor_position != NULL) dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 7c08bc62c1f53..ea88997e1bbd8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -259,13 +259,6 @@ struct transform_funcs { struct transform *xfm_base, const struct dc_cursor_attributes *attr); - void (*set_cursor_position)( - struct transform *xfm_base, - const struct dc_cursor_position *pos, - const struct dc_cursor_mi_param *param, - uint32_t width - ); - }; const uint16_t *get_filter_2tap_16p(void); From edf38b58ecd847df75723fb90d4a1c3669b1e670 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:11:37 -0500 Subject: [PATCH 299/406] drm/amd/display: Fix potential NULL and mem leak in create_links Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:148 create_links() error: potential null dereference 'link->link_enc'. (kzalloc returns null) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 15625fd944559..7240db2e6f095 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -121,6 +121,10 @@ static bool create_links( goto failed_alloc; } + link->link_index = dc->link_count; + dc->links[dc->link_count] = link; + dc->link_count++; + link->ctx = dc->ctx; link->dc = dc; link->connector_signal = SIGNAL_TYPE_VIRTUAL; @@ -128,6 +132,12 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + + if (!link->link_enc) { + BREAK_TO_DEBUGGER(); + goto failed_alloc; + } + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; @@ -139,10 +149,6 @@ static bool create_links( enc_init.encoder.id = ENCODER_ID_INTERNAL_VIRTUAL; enc_init.encoder.enum_id = ENUM_ID_1; virtual_link_encoder_construct(link->link_enc, &enc_init); - - link->link_index = dc->link_count; - dc->links[dc->link_count] = link; - dc->link_count++; } return true; From e41ab0309a1b75d31c360d1d2b0de58e8d7958ad Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:35:27 -0500 Subject: [PATCH 300/406] drm/amd/display: Fix couple more inconsistent NULL checks in dc_resource Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1001 acquire_free_pipe_for_stream() error: we previously assumed 'head_pipe' could be null (see line 998) drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1808 dc_validate_global_state() error: we previously assumed 'new_ctx' could be null (see line 1778) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 546fe99d34f25..b7422d3b71efb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -993,8 +993,10 @@ static struct pipe_ctx *acquire_free_pipe_for_stream( head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!head_pipe->plane_state) return head_pipe; @@ -1772,13 +1774,16 @@ enum dc_status dc_validate_global_state( enum dc_status result = DC_ERROR_UNEXPECTED; int i, j; + if (!new_ctx) + return DC_ERROR_UNEXPECTED; + if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); if (result != DC_OK) return result; } - for (i = 0; new_ctx && i < new_ctx->stream_count; i++) { + for (i = 0; i < new_ctx->stream_count; i++) { struct dc_stream_state *stream = new_ctx->streams[i]; for (j = 0; j < dc->res_pool->pipe_count; j++) { From 0a24bfcb2cc2cd161a0c8aae8fcbb58239d5da2b Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Thu, 16 Nov 2017 15:17:27 -0500 Subject: [PATCH 301/406] drm/amd/display: Do not put drm_atomic_state on resume drm_atomic_helper_resume now puts it for us. See relevant patch here: https://lists.freedesktop.org/archives/dri-devel/2017-October/154268.html Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5025dafb0517a..3d6fbc38d95f7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -716,7 +716,6 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state); - drm_atomic_state_put(adev->dm.cached_state); adev->dm.cached_state = NULL; amdgpu_dm_irq_resume_late(adev); From d5c9cb6e00047b194d54cadc1e91156f2875a3e5 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 16 Nov 2017 17:22:39 -0500 Subject: [PATCH 302/406] drm/amd/display: fix gamma setting Adding gamma changed check as condition for affected plane. We ignored adding plane as affected if modeset was not required. But for color management change we still need it. Signed-off-by: Roman Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3d6fbc38d95f7..8d94dfc7ba3f4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4717,7 +4717,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } else { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) + if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && + !new_crtc_state->color_mgmt_changed) continue; if (!new_crtc_state->enable) From 320a127437e5d3cbb7fc444f8769eb510d11d3b9 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 14 Nov 2017 20:45:52 -0500 Subject: [PATCH 303/406] drm/amd/display: Switch to drm_atomic_helper_wait_for_flip_done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new helper function is advised to be used for drviers that use the nonblocking commit tracking support instead of drm_atomic_helper_wait_for_vblanks. Signed-off-by: Andrey Grodzovsky Reviewed-by: Harry Wentland Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8d94dfc7ba3f4..f71fe6d2ddda7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4259,7 +4259,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_hw_done(state); if (wait_for_vblank) - drm_atomic_helper_wait_for_vblanks(dev, state); + drm_atomic_helper_wait_for_flip_done(dev, state); drm_atomic_helper_cleanup_planes(dev, state); } From 97011249c3b2ba6b038a2af7025063d62d1448ec Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 20 Nov 2017 12:45:54 -0500 Subject: [PATCH 304/406] drm/amd/display: USB-C / thunderbolt dock specific workaround reading dpcd 0x600 cause link loss for a particular USB-C dock with thurderbolt. workaround by avoiding dcpd 0x600 read unless it's necessary. Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 85 +++++++++---------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 8e97b42a03a27..e6bf05d76a942 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1512,7 +1512,7 @@ static bool hpd_rx_irq_check_link_loss_status( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data) { - uint8_t irq_reg_rx_power_state; + uint8_t irq_reg_rx_power_state = 0; enum dc_status dpcd_result = DC_ERROR_UNEXPECTED; union lane_status lane_status; uint32_t lane; @@ -1524,60 +1524,55 @@ static bool hpd_rx_irq_check_link_loss_status( if (link->cur_link_settings.lane_count == 0) return return_code; - /*1. Check that we can handle interrupt: Not in FS DOS, - * Not in "Display Timeout" state, Link is trained. - */ - dpcd_result = core_link_read_dpcd(link, - DP_SET_POWER, - &irq_reg_rx_power_state, - sizeof(irq_reg_rx_power_state)); + /*1. Check that Link Status changed, before re-training.*/ - if (dpcd_result != DC_OK) { - irq_reg_rx_power_state = DP_SET_POWER_D0; - dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: DPCD read failed to obtain power state.\n", - __func__); + /*parse lane status*/ + for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { + /* check status of lanes 0,1 + * changed DpcdAddress_Lane01Status (0x202) + */ + lane_status.raw = get_nibble_at_index( + &hpd_irq_dpcd_data->bytes.lane01_status.raw, + lane); + + if (!lane_status.bits.CHANNEL_EQ_DONE_0 || + !lane_status.bits.CR_DONE_0 || + !lane_status.bits.SYMBOL_LOCKED_0) { + /* if one of the channel equalization, clock + * recovery or symbol lock is dropped + * consider it as (link has been + * dropped) dp sink status has changed + */ + sink_status_changed = true; + break; + } } - if (irq_reg_rx_power_state == DP_SET_POWER_D0) { + /* Check interlane align.*/ + if (sink_status_changed || + !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) { - /*2. Check that Link Status changed, before re-training.*/ - - /*parse lane status*/ - for (lane = 0; - lane < link->cur_link_settings.lane_count; - lane++) { - - /* check status of lanes 0,1 - * changed DpcdAddress_Lane01Status (0x202)*/ - lane_status.raw = get_nibble_at_index( - &hpd_irq_dpcd_data->bytes.lane01_status.raw, - lane); - - if (!lane_status.bits.CHANNEL_EQ_DONE_0 || - !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { - /* if one of the channel equalization, clock - * recovery or symbol lock is dropped - * consider it as (link has been - * dropped) dp sink status has changed*/ - sink_status_changed = true; - break; - } + dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, + "%s: Link Status changed.\n", __func__); - } + return_code = true; - /* Check interlane align.*/ - if (sink_status_changed || - !hpd_irq_dpcd_data->bytes.lane_status_updated.bits. - INTERLANE_ALIGN_DONE) { + /*2. Check that we can handle interrupt: Not in FS DOS, + * Not in "Display Timeout" state, Link is trained. + */ + dpcd_result = core_link_read_dpcd(link, + DP_SET_POWER, + &irq_reg_rx_power_state, + sizeof(irq_reg_rx_power_state)); + if (dpcd_result != DC_OK) { dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: Link Status changed.\n", + "%s: DPCD read failed to obtain power state.\n", __func__); - - return_code = true; + } else { + if (irq_reg_rx_power_state != DP_SET_POWER_D0) + return_code = false; } } From 553d8e8b107159088cc4e2855a2bd9a358365e3f Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:55:24 +1100 Subject: [PATCH 305/406] docs: correct documentation for %pK Current documentation indicates that %pK prints a leading '0x'. This is not the case. Correct documentation for printk specifier %pK. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 361789df51ecf..71b62db7eca28 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -85,13 +85,12 @@ Examples:: printk("Faulted at %pS\n", (void *)regs->ip); printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack); - Kernel Pointers =============== :: - %pK 0x01234567 or 0x0123456789abcdef + %pK 01234567 or 0123456789abcdef For printing kernel pointers which should be hidden from unprivileged users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see From 57e734423adda83f3b05505875343284efe3b39c Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:56:39 +1100 Subject: [PATCH 306/406] vsprintf: refactor %pK code out of pointer() Currently code to handle %pK is all within the switch statement in pointer(). This is the wrong level of abstraction. Each of the other switch clauses call a helper function, pK should do the same. Refactor code out of pointer() to new function restricted_pointer(). Signed-off-by: Tobin C. Harding --- lib/vsprintf.c | 97 ++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 1746bae94d416..8dc5cf85cef47 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1343,6 +1343,59 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } +int kptr_restrict __read_mostly; + +static noinline_for_stack +char *restricted_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + spec.base = 16; + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2 * sizeof(ptr); + spec.flags |= ZEROPAD; + } + + switch (kptr_restrict) { + case 0: + /* Always print %pK values */ + break; + case 1: { + const struct cred *cred; + + /* + * kptr_restrict==1 cannot be used in IRQ context + * because its test for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) + return string(buf, end, "pK-error", spec); + + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + cred = current_cred(); + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ + ptr = NULL; + break; + } + + return number(buf, end, (unsigned long)ptr, spec); +} + static noinline_for_stack char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt) { @@ -1591,8 +1644,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } -int kptr_restrict __read_mostly; - /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1792,47 +1843,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': - switch (kptr_restrict) { - case 0: - /* Always print %pK values */ - break; - case 1: { - const struct cred *cred; - - /* - * kptr_restrict==1 cannot be used in IRQ context - * because its test for CAP_SYSLOG would be meaningless. - */ - if (in_irq() || in_serving_softirq() || in_nmi()) { - if (spec.field_width == -1) - spec.field_width = default_width; - return string(buf, end, "pK-error", spec); - } - - /* - * Only print the real pointer value if the current - * process has CAP_SYSLOG and is running with the - * same credentials it started with. This is because - * access to files is checked at open() time, but %pK - * checks permission at read() time. We don't want to - * leak pointer values if a binary opens a file using - * %pK and then elevates privileges before reading it. - */ - cred = current_cred(); - if (!has_capability_noaudit(current, CAP_SYSLOG) || - !uid_eq(cred->euid, cred->uid) || - !gid_eq(cred->egid, cred->gid)) - ptr = NULL; - break; - } - case 2: - default: - /* Always print 0's for %pK */ - ptr = NULL; - break; - } - break; - + return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, fmt); case 'a': From ad67b74d2469d9b82aaa572d76474c95bc484d57 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 1 Nov 2017 15:32:23 +1100 Subject: [PATCH 307/406] printk: hash addresses printed with %p Currently there exist approximately 14 000 places in the kernel where addresses are being printed using an unadorned %p. This potentially leaks sensitive information regarding the Kernel layout in memory. Many of these calls are stale, instead of fixing every call lets hash the address by default before printing. This will of course break some users, forcing code printing needed addresses to be updated. Code that _really_ needs the address will soon be able to use the new printk specifier %px to print the address. For what it's worth, usage of unadorned %p can be broken down as follows (thanks to Joe Perches). $ git grep -E '%p[^A-Za-z0-9]' | cut -f1 -d"/" | sort | uniq -c 1084 arch 20 block 10 crypto 32 Documentation 8121 drivers 1221 fs 143 include 101 kernel 69 lib 100 mm 1510 net 40 samples 7 scripts 11 security 166 sound 152 tools 2 virt Add function ptr_to_id() to map an address to a 32 bit unique identifier. Hash any unadorned usage of specifier %p and any malformed specifiers. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 12 +++- lib/test_printf.c | 108 ++++++++++++++++++++----------- lib/vsprintf.c | 81 +++++++++++++++++++++-- 3 files changed, 155 insertions(+), 46 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 71b62db7eca28..b4e668ac4fe33 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -5,7 +5,6 @@ How to get printk format specifiers right :Author: Randy Dunlap :Author: Andrew Murray - Integer types ============= @@ -45,6 +44,17 @@ return from vsnprintf. Raw pointer value SHOULD be printed with %p. The kernel supports the following extended format specifiers for pointer types: +Pointer Types +============= + +Pointers printed without a specifier extension (i.e unadorned %p) are +hashed to give a unique identifier without leaking kernel addresses to user +space. On 64 bit machines the first 32 bits are zeroed. + +:: + + %p abcdef12 or 00000000abcdef12 + Symbols/Function Pointers ========================= diff --git a/lib/test_printf.c b/lib/test_printf.c index 563f10e6876ae..71ebfa43ad05f 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -24,24 +24,6 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -#define PTR1 ((void*)0x01234567) -#define PTR2 ((void*)(long)(int)0xfedcba98) - -#if BITS_PER_LONG == 64 -#define PTR1_ZEROES "000000000" -#define PTR1_SPACES " " -#define PTR1_STR "1234567" -#define PTR2_STR "fffffffffedcba98" -#define PTR_WIDTH 16 -#else -#define PTR1_ZEROES "0" -#define PTR1_SPACES " " -#define PTR1_STR "1234567" -#define PTR2_STR "fedcba98" -#define PTR_WIDTH 8 -#endif -#define PTR_WIDTH_STR stringify(PTR_WIDTH) - static unsigned total_tests __initdata; static unsigned failed_tests __initdata; static char *test_buffer __initdata; @@ -217,30 +199,79 @@ test_string(void) test("a | | ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c"); } +#define PLAIN_BUF_SIZE 64 /* leave some space so we don't oops */ + +#if BITS_PER_LONG == 64 + +#define PTR_WIDTH 16 +#define PTR ((void *)0xffff0123456789ab) +#define PTR_STR "ffff0123456789ab" +#define ZEROS "00000000" /* hex 32 zero bits */ + +static int __init +plain_format(void) +{ + char buf[PLAIN_BUF_SIZE]; + int nchars; + + nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); + + if (nchars != PTR_WIDTH || strncmp(buf, ZEROS, strlen(ZEROS)) != 0) + return -1; + + return 0; +} + +#else + +#define PTR_WIDTH 8 +#define PTR ((void *)0x456789ab) +#define PTR_STR "456789ab" + +static int __init +plain_format(void) +{ + /* Format is implicitly tested for 32 bit machines by plain_hash() */ + return 0; +} + +#endif /* BITS_PER_LONG == 64 */ + +static int __init +plain_hash(void) +{ + char buf[PLAIN_BUF_SIZE]; + int nchars; + + nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); + + if (nchars != PTR_WIDTH || strncmp(buf, PTR_STR, PTR_WIDTH) == 0) + return -1; + + return 0; +} + +/* + * We can't use test() to test %p because we don't know what output to expect + * after an address is hashed. + */ static void __init plain(void) { - test(PTR1_ZEROES PTR1_STR " " PTR2_STR, "%p %p", PTR1, PTR2); - /* - * The field width is overloaded for some %p extensions to - * pass another piece of information. For plain pointers, the - * behaviour is slightly odd: One cannot pass either the 0 - * flag nor a precision to %p without gcc complaining, and if - * one explicitly gives a field width, the number is no longer - * zero-padded. - */ - test("|" PTR1_STR PTR1_SPACES " | " PTR1_SPACES PTR1_STR "|", - "|%-*p|%*p|", PTR_WIDTH+2, PTR1, PTR_WIDTH+2, PTR1); - test("|" PTR2_STR " | " PTR2_STR "|", - "|%-*p|%*p|", PTR_WIDTH+2, PTR2, PTR_WIDTH+2, PTR2); + int err; - /* - * Unrecognized %p extensions are treated as plain %p, but the - * alphanumeric suffix is ignored (that is, does not occur in - * the output.) - */ - test("|"PTR1_ZEROES PTR1_STR"|", "|%p0y|", PTR1); - test("|"PTR2_STR"|", "|%p0y|", PTR2); + err = plain_hash(); + if (err) { + pr_warn("plain 'p' does not appear to be hashed\n"); + failed_tests++; + return; + } + + err = plain_format(); + if (err) { + pr_warn("hashing plain 'p' has unexpected format\n"); + failed_tests++; + } } static void __init @@ -251,6 +282,7 @@ symbol_ptr(void) static void __init kernel_ptr(void) { + /* We can't test this without access to kptr_restrict. */ } static void __init diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8dc5cf85cef47..d69452a0f2fa5 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #ifdef CONFIG_BLOCK #include #endif @@ -1644,6 +1646,73 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static bool have_filled_random_ptr_key __read_mostly; +static siphash_key_t ptr_key __read_mostly; + +static void fill_random_ptr_key(struct random_ready_callback *unused) +{ + get_random_bytes(&ptr_key, sizeof(ptr_key)); + /* + * have_filled_random_ptr_key==true is dependent on get_random_bytes(). + * ptr_to_id() needs to see have_filled_random_ptr_key==true + * after get_random_bytes() returns. + */ + smp_mb(); + WRITE_ONCE(have_filled_random_ptr_key, true); +} + +static struct random_ready_callback random_ready = { + .func = fill_random_ptr_key +}; + +static int __init initialize_ptr_random(void) +{ + int ret = add_random_ready_callback(&random_ready); + + if (!ret) { + return 0; + } else if (ret == -EALREADY) { + fill_random_ptr_key(&random_ready); + return 0; + } + + return ret; +} +early_initcall(initialize_ptr_random); + +/* Maps a pointer to a 32 bit unique identifier. */ +static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) +{ + unsigned long hashval; + const int default_width = 2 * sizeof(ptr); + + if (unlikely(!have_filled_random_ptr_key)) { + spec.field_width = default_width; + /* string length must be less than default_width */ + return string(buf, end, "(ptrval)", spec); + } + +#ifdef CONFIG_64BIT + hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); + /* + * Mask off the first 32 bits, this makes explicit that we have + * modified the address (and 32 bits is plenty for a unique ID). + */ + hashval = hashval & 0xffffffff; +#else + hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key); +#endif + + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = default_width; + spec.flags |= ZEROPAD; + } + spec.base = 16; + + return number(buf, end, hashval, spec); +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1754,6 +1823,9 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a * pointer to the real address. + * + * Note: The default behaviour (unadorned %p) is to hash the address, + * rendering it useful as a unique identifier. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -1869,14 +1941,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); } } - spec.flags |= SMALL; - if (spec.field_width == -1) { - spec.field_width = default_width; - spec.flags |= ZEROPAD; - } - spec.base = 16; - return number(buf, end, (unsigned long) ptr, spec); + /* default is to _not_ leak addresses, hash before printing */ + return ptr_to_id(buf, end, ptr, spec); } /* From 7b1924a1d930eb27fc79c4e4e2a6c1c970623e68 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:59:45 +1100 Subject: [PATCH 308/406] vsprintf: add printk specifier %px printk specifier %p now hashes all addresses before printing. Sometimes we need to see the actual unmodified address. This can be achieved using %lx but then we face the risk that if in future we want to change the way the Kernel handles printing of pointers we will have to grep through the already existent 50 000 %lx call sites. Let's add specifier %px as a clear, opt-in, way to print a pointer and maintain some level of isolation from all the other hex integer output within the Kernel. Add printk specifier %px to print the actual unmodified address. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 18 +++++++++++++++++- lib/vsprintf.c | 18 ++++++++++++++++++ scripts/checkpatch.pl | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index b4e668ac4fe33..aa0a776c817a7 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -49,7 +49,8 @@ Pointer Types Pointers printed without a specifier extension (i.e unadorned %p) are hashed to give a unique identifier without leaking kernel addresses to user -space. On 64 bit machines the first 32 bits are zeroed. +space. On 64 bit machines the first 32 bits are zeroed. If you _really_ +want the address see %px below. :: @@ -106,6 +107,21 @@ For printing kernel pointers which should be hidden from unprivileged users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see Documentation/sysctl/kernel.txt for more details. +Unmodified Addresses +==================== + +:: + + %px 01234567 or 0123456789abcdef + +For printing pointers when you _really_ want to print the address. Please +consider whether or not you are leaking sensitive information about the +Kernel layout in memory before printing pointers with %px. %px is +functionally equivalent to %lx. %px is preferred to %lx because it is more +uniquely grep'able. If, in the future, we need to modify the way the Kernel +handles printing pointers it will be nice to be able to find the call +sites. + Struct Resources ================ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d69452a0f2fa5..d960aead03368 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1646,6 +1646,20 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static noinline_for_stack +char *pointer_string(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + spec.base = 16; + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2 * sizeof(ptr); + spec.flags |= ZEROPAD; + } + + return number(buf, end, (unsigned long int)ptr, spec); +} + static bool have_filled_random_ptr_key __read_mostly; static siphash_key_t ptr_key __read_mostly; @@ -1818,6 +1832,8 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) * c major compatible string * C full compatible string * + * - 'x' For printing the address. Equivalent to "%lx". + * * ** Please update also Documentation/printk-formats.txt when making changes ** * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 @@ -1940,6 +1956,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': return device_node_string(buf, end, ptr, spec, fmt + 1); } + case 'x': + return pointer_string(buf, end, ptr, spec); } /* default is to _not_ leak addresses, hash before printing */ diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 95cda3ecc66b8..040aa79e1d9d3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5753,7 +5753,7 @@ sub process { for (my $count = $linenr; $count <= $lc; $count++) { my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); $fmt =~ s/%%//g; - if ($fmt =~ /(\%[\*\d\.]*p(?![\WFfSsBKRraEhMmIiUDdgVCbGNO]).)/) { + if ($fmt =~ /(\%[\*\d\.]*p(?![\WFfSsBKRraEhMmIiUDdgVCbGNOx]).)/) { $bad_extension = $1; last; } From 6424f6bb432752c7eb90cbeeb1c31d6125bba39a Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 1 Nov 2017 15:32:22 +1100 Subject: [PATCH 309/406] kasan: use %px to print addresses instead of %p Pointers printed with %p are now hashed by default. Kasan needs the actual address. We can use the new printk specifier %px for this purpose. Use %px instead of %p to print addresses. Signed-off-by: Tobin C. Harding --- mm/kasan/report.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 6bcfb01ba0386..410c8235e6715 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -134,7 +134,7 @@ static void print_error_description(struct kasan_access_info *info) pr_err("BUG: KASAN: %s in %pS\n", bug_type, (void *)info->ip); - pr_err("%s of size %zu at addr %p by task %s/%d\n", + pr_err("%s of size %zu at addr %px by task %s/%d\n", info->is_write ? "Write" : "Read", info->access_size, info->access_addr, current->comm, task_pid_nr(current)); } @@ -206,7 +206,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, const char *rel_type; int rel_bytes; - pr_err("The buggy address belongs to the object at %p\n" + pr_err("The buggy address belongs to the object at %px\n" " which belongs to the cache %s of size %d\n", object, cache->name, cache->object_size); @@ -225,7 +225,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, } pr_err("The buggy address is located %d bytes %s of\n" - " %d-byte region [%p, %p)\n", + " %d-byte region [%px, %px)\n", rel_bytes, rel_type, cache->object_size, (void *)object_addr, (void *)(object_addr + cache->object_size)); } @@ -302,7 +302,7 @@ static void print_shadow_for_address(const void *addr) char shadow_buf[SHADOW_BYTES_PER_ROW]; snprintf(buffer, sizeof(buffer), - (i == 0) ? ">%p: " : " %p: ", kaddr); + (i == 0) ? ">%px: " : " %px: ", kaddr); /* * We should not pass a shadow pointer to generic * function, because generic functions may try to From aca7573fde95152378361cba734996b384f3b1d3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 28 Nov 2017 08:23:04 +0530 Subject: [PATCH 310/406] powerpc: Avoid signed to unsigned conversion in set_thread_tidr() There is an unsafe signed to unsigned conversion in set_thread_tidr() that may cause an error value to be assigned to SPRN_TIDR register and used as thread-id. The issue happens as assign_thread_tidr() returns an int and thread.tidr is an unsigned-long. So a negative error code returned from assign_thread_tidr() will fail the error check and gets assigned as tidr as a large positive value. To fix this the patch assigns the return value of assign_thread_tidr() to a temporary int and assigns it to thread.tidr iff its '> 0'. The patch shouldn't impact the calling convention of set_thread_tidr() i.e all -ve return-values are error codes and a return value of '0' indicates success. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Christophe Lombard clombard@linux.vnet.ibm.com Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfdd783e39166..d205b52e3850a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1569,16 +1569,19 @@ void arch_release_task_struct(struct task_struct *t) */ int set_thread_tidr(struct task_struct *t) { + int rc; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -EINVAL; if (t != current) return -EINVAL; - t->thread.tidr = assign_thread_tidr(); - if (t->thread.tidr < 0) - return t->thread.tidr; + rc = assign_thread_tidr(); + if (rc < 0) + return rc; + t->thread.tidr = rc; mtspr(SPRN_TIDR, t->thread.tidr); return 0; From 7e4d4233260be0611c7fbdb2730c12731c4b8dc0 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 24 Nov 2017 14:03:38 +0530 Subject: [PATCH 311/406] powerpc: Do not assign thread.tidr if already assigned If set_thread_tidr() is called twice for same task_struct then it will allocate a new tidr value to it leaving the previous value still dangling in the vas_thread_ida table. To fix this the patch changes set_thread_tidr() to check if a tidr value is already assigned to the task_struct and if yes then returns zero. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Andrew Donnellan [mpe: Modify to return 0 in the success case, not the TID value] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d205b52e3850a..5acb5a176dbe5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1577,6 +1577,9 @@ int set_thread_tidr(struct task_struct *t) if (t != current) return -EINVAL; + if (t->thread.tidr) + return 0; + rc = assign_thread_tidr(); if (rc < 0) return rc; From 80a780a167d9267c72867b806142bd6ec69ba123 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:06 +0100 Subject: [PATCH 312/406] mmc: core: prepend 0x to pre_eol_info entry in sysfs The sysfs entry "pre_eol_info" was missing the 0x prefix to identify it as hex formatted. Fixes: 46bc5c408e4e ("mmc: core: Export device lifetime information through sysfs") Signed-off-by: Bastian Stender Cc: # v4.11+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a552f61060d21..b8259fcb4bda9 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -781,7 +781,7 @@ MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev); -MMC_DEV_ATTR(pre_eol_info, "%02x\n", card->ext_csd.pre_eol_info); +MMC_DEV_ATTR(pre_eol_info, "0x%02x\n", card->ext_csd.pre_eol_info); MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n", card->ext_csd.device_life_time_est_typ_a, card->ext_csd.device_life_time_est_typ_b); From c892b0d81705c566f575e489efc3c50762db1bde Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:07 +0100 Subject: [PATCH 313/406] mmc: core: prepend 0x to OCR entry in sysfs The sysfs entry "ocr" was missing the 0x prefix to identify it as hex formatted. Fixes: 5fb06af7a33b ("mmc: core: Extend sysfs with OCR register") Signed-off-by: Bastian Stender Cc: # v4.8+ [Ulf: Amended change to also cover SD-cards] Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- drivers/mmc/core/sd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b8259fcb4bda9..d209fb4669790 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -791,7 +791,7 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size); MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult); MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(cmdq_en, "%d\n", card->ext_csd.cmdq_en); static ssize_t mmc_fwrev_show(struct device *dev, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 45bf78f327163..62b84dd8f9fe3 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -675,7 +675,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_dsr_show(struct device *dev, From a76dfe350c1fcb466b18595e65b33fa6abf65472 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 29 Nov 2017 12:04:31 +0100 Subject: [PATCH 314/406] drm/atomic: make drm_atomic_helper_wait_for_vblanks more agressive drm_atomic_helper_setup_commit expects that flipping of previous commits has happened when it is called to set up a new commit. This can be violated by commits where userspace doesn't get a flip completion event to synchronize against i.e. legacy modesets and property changes. The expectation is that those are done by blocking commits, which wait for completion. Most drivers call drm_atomic_helper_wait_for_vblanks in the commit_tail to ensure completion, but the wait for next vblank might not actually happen if the commit didn't change any planes. Make the wait more agressive by also waiting if no planes changed. This is the minimal regression fix for the 4.15 kernel series. Long term drivers should switch away from drm_atomic_helper_wait_for_vblanks and use drm_atomic_helper_wait_for_flip_done instead. Fixes: de39bec1a0c4 ("drm/atomic: Remove waits in drm_atomic_helper_commit_cleanup_done, v2.") Signed-off-by: Lucas Stach Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171129110431.6300-1-l.stach@pengutronix.de --- drivers/gpu/drm/drm_atomic_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 71d712f1b56a2..b16f1d69a0bbf 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1225,7 +1225,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, return; for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) { - if (!new_crtc_state->active || !new_crtc_state->planes_changed) + if (!new_crtc_state->active) continue; ret = drm_crtc_vblank_get(crtc); From 5478e478eee3b096b8d998d4ed445da30da2dfbc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 Nov 2017 22:06:13 +0100 Subject: [PATCH 315/406] eeprom: at24: correctly set the size for at24mac402 There's an ilog2() expansion in AT24_DEVICE_MAGIC() which rounds down the actual size of EUI-48 byte array in at24mac402 eeproms to 4 from 6, making it impossible to read it all. Fix it by manually adjusting the value in probe(). This patch contains a temporary fix that is suitable for stable branches. Eventually we'll probably remove the call to ilog2() while converting the magic values to actual structs. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/misc/eeprom/at24.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index e0b4b36ef0105..783244b485ccd 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -730,6 +730,16 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); + /* + * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while + * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. + * + * Eventually we'll get rid of the magic values altoghether in favor of + * real structs, but for now just manually set the right size. + */ + if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) + chip.byte_len = 6; + /* Use I2C operations unless we're stuck with SMBus extensions. */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { if (chip.flags & AT24_FLAG_ADDR16) From 01e4fab6c1131a5e4f12104e7134da701def0434 Mon Sep 17 00:00:00 2001 From: Hyong-Youb Kim Date: Wed, 29 Nov 2017 00:03:50 -0500 Subject: [PATCH 316/406] myri10ge: Update MAINTAINERS Change the maintainer to Chris Lee who has access to Myricom hardware and can test/review. Update the website URL. Signed-off-by: Hyong-Youb Kim Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76d..77d819b458a99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9331,9 +9331,9 @@ F: drivers/gpu/drm/mxsfb/ F: Documentation/devicetree/bindings/display/mxsfb-drm.txt MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE) -M: Hyong-Youb Kim +M: Chris Lee L: netdev@vger.kernel.org -W: https://www.myricom.com/support/downloads/myri10ge.html +W: https://www.cspi.com/ethernet-products/support/downloads/ S: Supported F: drivers/net/ethernet/myricom/myri10ge/ From 52dd06506e9bbc2a37b352df7dfc5468f8da21fd Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Nov 2017 12:16:03 +0100 Subject: [PATCH 317/406] drm/fb_helper: Disable all crtc's when initial setup fails. Some drivers like i915 start with crtc's enabled, but with deferred fbcon setup they were no longer disabled as part of fbdev setup. Headless units could no longer enter pc3 state because the crtc was still enabled. Fix this by calling restore_fbdev_mode when we would have called it otherwise once during initial fbdev setup. Signed-off-by: Maarten Lankhorst Fixes: ca91a2758fce ("drm/fb-helper: Support deferred setup") Cc: # v4.14+ Reported-by: Thomas Voegtle Tested-by: Thomas Voegtle Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171128111603.62757-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_fb_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 07374008f146f..e561663344559 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1809,6 +1809,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) { DRM_INFO("Cannot find any crtc or sizes\n"); + + /* First time: disable all crtc's.. */ + if (!fb_helper->deferred_setup && !READ_ONCE(fb_helper->dev->master)) + restore_fbdev_mode(fb_helper); return -EAGAIN; } From 15bfe05c8d6386f1a90e9340d15336e85e32aad6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2017 11:01:09 +0100 Subject: [PATCH 318/406] net: ethernet: xilinx: Mark XILINX_LL_TEMAC broken on 64-bit On 64-bit (e.g. powerpc64/allmodconfig): drivers/net/ethernet/xilinx/ll_temac_main.c: In function 'temac_start_xmit_done': drivers/net/ethernet/xilinx/ll_temac_main.c:633:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); ^ cdmac_bd.app4 is u32, so it is too small to hold a kernel pointer. Note that several other fields in struct cdmac_bd are also too small to hold physical addresses on 64-bit platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2a..da4ec575ccf9b 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC From a7e4fbbfdf7935333800bd801f6affc2515506f2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 29 Nov 2017 14:11:49 +0000 Subject: [PATCH 319/406] net: via: via-rhine: use %p to format void * address instead of %x Don't use %x and casting to print out an address, instead use %p and remove the casting. Cleans up smatch warnings: drivers/net/ethernet/via/via-rhine.c:998 rhine_init_one_common() warn: argument 4 to %lx specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 83e6f76eb9654..33949248c829e 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -995,8 +995,8 @@ static int rhine_init_one_common(struct device *hwdev, u32 quirks, else name = "Rhine III"; - netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n", - name, (long)ioaddr, dev->dev_addr, rp->irq); + netdev_info(dev, "VIA %s at %p, %pM, IRQ %d\n", + name, ioaddr, dev->dev_addr, rp->irq); dev_set_drvdata(hwdev, dev); From 644a1f19c6c8393d0c4168a5adf79056da6822eb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 27 Nov 2017 20:46:22 +0100 Subject: [PATCH 320/406] eeprom: at24: fix reading from 24MAC402/24MAC602 Chip datasheet mentions that word addresses other than the actual start position of the MAC delivers undefined results. So fix this. Current implementation doesn't work due to this wrong offset. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 783244b485ccd..8ca6772b3baf3 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -425,7 +425,8 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, memset(msg, 0, sizeof(msg)); msg[0].addr = client->addr; msg[0].buf = addrbuf; - addrbuf[0] = 0x90 + offset; + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; msg[0].len = 1; msg[1].addr = client->addr; msg[1].flags = I2C_M_RD; From d9bcd462daf34aebb8de9ad7f76de0198bb5a0f0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 24 Nov 2017 07:47:50 +0100 Subject: [PATCH 321/406] eeprom: at24: check at24_read/write arguments So far we completely rely on the caller to provide valid arguments. To be on the safe side perform an own sanity check. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 8ca6772b3baf3..305a7a464d091 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -569,6 +569,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); @@ -614,6 +617,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return -EINVAL; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); From f6454f80e8a965fca203dab28723f68ec78db608 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 29 Nov 2017 15:20:00 +0100 Subject: [PATCH 322/406] ethernet: dwmac-stm32: Fix copyright Uniformize STMicroelectronics copyrights header Signed-off-by: Benjamin Gaignard CC: Alexandre Torgue Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 61cb24810d101..9e6db16af663b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -1,8 +1,8 @@ /* * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU * - * Copyright (C) Alexandre Torgue 2015 - * Author: Alexandre Torgue + * Copyright (C) STMicroelectronics SA 2017 + * Author: Alexandre Torgue for STMicroelectronics. * License terms: GNU General Public License (GPL), version 2 * */ From 88bc0ede8d35edc969350852894dc864a2dc1859 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: [PATCH 323/406] quota: Check for register_shrinker() failure. register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d38684ff2ebb8..020c597ef9b6e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2992,7 +2992,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } From f55e1014f9e567d830eb3a7f57d879a34872af4b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 09:01:01 -0800 Subject: [PATCH 324/406] Revert "mm, thp: Do not make pmd/pud dirty without a reason" This reverts commit 152e93af3cfe2d29d8136cc0a02a8612507136ee. It was a nice cleanup in theory, but as Nicolai Stange points out, we do need to make the page dirty for the copy-on-write case even when we didn't end up making it writable, since the dirty bit is what we use to check that we've gone through a COW cycle. Reported-by: Michal Hocko Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 31 ++++++++++++------------------- mm/internal.h | 3 +-- mm/khugepaged.c | 2 +- mm/memory.c | 2 +- mm/migrate.c | 2 +- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f22401fd83b5c..0e7ded98d114d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -474,13 +474,10 @@ static int __init setup_transparent_hugepage(char *str) } __setup("transparent_hugepage=", setup_transparent_hugepage); -pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty) +pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { - if (likely(vma->vm_flags & VM_WRITE)) { + if (likely(vma->vm_flags & VM_WRITE)) pmd = pmd_mkwrite(pmd); - if (dirty) - pmd = pmd_mkdirty(pmd); - } return pmd; } @@ -602,7 +599,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } entry = mk_huge_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr, true); mem_cgroup_commit_charge(page, memcg, false, true); lru_cache_add_active_or_unevictable(page, vma); @@ -744,8 +741,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); if (write) { - entry = pmd_mkyoung(entry); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = pmd_mkyoung(pmd_mkdirty(entry)); + entry = maybe_pmd_mkwrite(entry, vma); } if (pgtable) { @@ -791,14 +788,10 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma, - bool dirty) +static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) { - if (likely(vma->vm_flags & VM_WRITE)) { + if (likely(vma->vm_flags & VM_WRITE)) pud = pud_mkwrite(pud); - if (dirty) - pud = pud_mkdirty(pud); - } return pud; } @@ -814,8 +807,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); if (write) { - entry = pud_mkyoung(entry); - entry = maybe_pud_mkwrite(entry, vma, true); + entry = pud_mkyoung(pud_mkdirty(entry)); + entry = maybe_pud_mkwrite(entry, vma); } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); @@ -1286,7 +1279,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); ret |= VM_FAULT_WRITE; @@ -1356,7 +1349,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) } else { pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); page_add_new_anon_rmap(new_page, vma, haddr, true); mem_cgroup_commit_charge(new_page, memcg, false, true); @@ -2935,7 +2928,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (is_write_migration_entry(entry)) - pmde = maybe_pmd_mkwrite(pmde, vma, false); + pmde = maybe_pmd_mkwrite(pmde, vma); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); page_add_anon_rmap(new, vma, mmun_start, true); diff --git a/mm/internal.h b/mm/internal.h index b35cdebda0cef..e6bd35182daee 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -328,8 +328,7 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) } } -extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, - bool dirty); +extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /* * At what user virtual address is page expected in @vma? diff --git a/mm/khugepaged.c b/mm/khugepaged.c index db43dc8a8ae61..ea4ff259b6719 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm, pgtable = pmd_pgtable(_pmd); _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); - _pmd = maybe_pmd_mkwrite(_pmd, vma, false); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/memory.c b/mm/memory.c index b10c1d26f675b..85e7a87da79fe 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); page_add_file_rmap(page, true); diff --git a/mm/migrate.c b/mm/migrate.c index 57865fc8cfe33..4d0be47a322a8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, } entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, false); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); /* * Clear the old entry under pagetable lock and establish the new PTE. From 668533dc0764b30c9dd2baf3ca800156f688326b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 10:30:13 -0800 Subject: [PATCH 325/406] kallsyms: take advantage of the new '%px' format The conditional kallsym hex printing used a special fixed-width '%lx' output (KALLSYM_FMT) in preparation for the hashing of %p, but that series ended up adding a %px specifier to help with the conversions. Use it, and avoid the "print pointer as an unsigned long" code. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 6 ------ kernel/kallsyms.c | 8 ++++---- kernel/module.c | 6 +++--- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 708f337d780be..bd118a6c60cbf 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,12 +14,6 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) -#ifndef CONFIG_64BIT -# define KALLSYM_FMT "%08lx" -#else -# define KALLSYM_FMT "%016lx" -#endif - struct module; #ifdef CONFIG_KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 531ffa984bc26..d5fa4116688af 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -614,14 +614,14 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { - unsigned long value; + void *value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; - value = iter->show_value ? iter->value : 0; + value = iter->show_value ? (void *)iter->value : NULL; if (iter->module_name[0]) { char type; @@ -632,10 +632,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, + seq_printf(m, "%px %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, KALLSYM_FMT " %c %s\n", value, + seq_printf(m, "%px %c %s\n", value, iter->type, iter->name); return 0; } diff --git a/kernel/module.c b/kernel/module.c index f0411a2717655..dea01ac9cb74c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4157,7 +4157,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; - unsigned long value; + void *value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4173,8 +4173,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - value = m->private ? 0 : (unsigned long)mod->core_layout.base; - seq_printf(m, " 0x" KALLSYM_FMT, value); + value = m->private ? NULL : mod->core_layout.base; + seq_printf(m, " 0x%px", value); /* Taints info */ if (mod->taints) From 445f288d706cb5a418b13c340280b47d4585d667 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Nov 2017 13:50:11 -0500 Subject: [PATCH 326/406] NFSv4: Ensure gcc 4.4.4 can compile initialiser for "invalid_stateid" gcc 4.4.4 is too old to have full C11 anonymous union support, so the current initialiser fails to compile. Reported-by: Boris Ostrovsky Signed-off-by: Trond Myklebust (compile-)Tested-by: Boris Ostrovsky Reviewed-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 54fd56d715a8c..e4f4a09ed9f49 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,8 +71,8 @@ const nfs4_stateid zero_stateid = { }; const nfs4_stateid invalid_stateid = { { - .seqid = cpu_to_be32(0xffffffffU), - .other = { 0 }, + /* Funky initialiser keeps older gcc versions happy */ + .data = { 0xff, 0xff, 0xff, 0xff, 0 }, }, .type = NFS4_INVALID_STATEID_TYPE, }; From 4ba161a793d5f43757c35feff258d9f20a082940 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: [PATCH 327/406] SUNRPC: Allow connect to return EHOSTUNREACH Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 684e356b40e4c..c2b2d489b57ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2440,6 +2440,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* From ef0010a30935de4e0211cbc7bdffc30446cdee9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 11:28:09 -0800 Subject: [PATCH 328/406] vsprintf: don't use 'restricted_pointer()' when not restricting Instead, just fall back on the new '%p' behavior which hashes the pointer. Otherwise, '%pK' - that was intended to mark a pointer as restricted - just ends up leaking pointers that a normal '%p' wouldn't leak. Which just make the whole thing pointless. I suspect we should actually get rid of '%pK' entirely, and make it just work as '%p' regardless, but this is the minimal obvious fix. People who actually use 'kptr_restrict' should weigh in on which behavior they want. Cc: Tobin Harding Cc: Kees Cook Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d960aead03368..01c3957b2de62 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1931,6 +1931,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': + if (!kptr_restrict) + break; return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, fmt); From 1569d651f152870663fabd8f1c80af353f967ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 27 Nov 2017 13:12:35 +0100 Subject: [PATCH 329/406] drm/ttm: fix populate_and_map() functions once more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts "drm/ttm: Fix configuration error around populate_and_map() functions". This fix has gone into the wrong direction. Those helpers should be available even when neither CONFIG_INTEL_IOMMU nor CONFIG_SWIOTLB are set. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 -- include/drm/ttm/ttm_page_alloc.h | 32 +++++++++------------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 316f831ad5f04..6d48ccca0b385 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1058,7 +1058,6 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm) } EXPORT_SYMBOL(ttm_pool_unpopulate); -#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) { unsigned i, j; @@ -1129,7 +1128,6 @@ void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) ttm_pool_unpopulate(&tt->ttm); } EXPORT_SYMBOL(ttm_unmap_and_unpopulate_pages); -#endif int ttm_page_alloc_debugfs(struct seq_file *m, void *data) { diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 38a2b4770c35f..593811362a917 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -58,12 +58,21 @@ int ttm_pool_populate(struct ttm_tt *ttm); */ void ttm_pool_unpopulate(struct ttm_tt *ttm); +/** + * Populates and DMA maps pages to fullfil a ttm_dma_populate() request + */ +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); + +/** + * Unpopulates and DMA unmaps pages as part of a + * ttm_dma_unpopulate() request */ +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); + /** * Output the state of pools to debugfs file */ int ttm_page_alloc_debugfs(struct seq_file *m, void *data); - #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) /** * Initialize pool allocator. @@ -83,17 +92,6 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); - -/** - * Populates and DMA maps pages to fullfil a ttm_dma_populate() request - */ -int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); - -/** - * Unpopulates and DMA unmaps pages as part of a - * ttm_dma_unpopulate() request */ -void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); - #else static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) @@ -116,16 +114,6 @@ static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { } - -static inline int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) -{ - return -ENOMEM; -} - -static inline void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) -{ -} - #endif #endif From 7fdf165a52505392eb059902b0df55e79a45c25f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 29 Nov 2017 17:26:56 +0100 Subject: [PATCH 330/406] drm/radeon: remove init of CIK VMIDs 8-16 for amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMIDs 8-16 in Kaveri were reserved for use by the amdkfd driver. Because we removed amdkfd support from radeon, those VMIDs are now used by radeon and are initialized by radeon. This patch removes the function that initialized those VMIDs for amdkfd use. This initialization overridden the radeon initialization and caused GPU faults and GUI crashed. Fixes: f4fa88ab28ab ("drm/radeon: deprecate and remove KFD interface") Rported-by: Michel Dänzer Acked-by: Christian König Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 898f9a0788304..a6511918f6325 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5451,28 +5451,6 @@ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) WREG32(VM_INVALIDATE_REQUEST, 0x1); } -static void cik_pcie_init_compute_vmid(struct radeon_device *rdev) -{ - int i; - uint32_t sh_mem_bases, sh_mem_config; - - sh_mem_bases = 0x6000 | 0x6000 << 16; - sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); - sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); - - mutex_lock(&rdev->srbm_mutex); - for (i = 8; i < 16; i++) { - cik_srbm_select(rdev, 0, 0, 0, i); - /* CP and shaders */ - WREG32(SH_MEM_CONFIG, sh_mem_config); - WREG32(SH_MEM_APE1_BASE, 1); - WREG32(SH_MEM_APE1_LIMIT, 0); - WREG32(SH_MEM_BASES, sh_mem_bases); - } - cik_srbm_select(rdev, 0, 0, 0, 0); - mutex_unlock(&rdev->srbm_mutex); -} - /** * cik_pcie_gart_enable - gart enable * @@ -5586,8 +5564,6 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); - cik_pcie_init_compute_vmid(rdev); - cik_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(rdev->mc.gtt_size >> 20), From e5372cd5ef12b05ae74d608f95dc53fe06558867 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Nov 2017 15:09:29 -0500 Subject: [PATCH 331/406] sparc64: Fix boot on T4 and later. If we don't put the NG4fls.o object into the same part of the link as the generic sparc64 objects for fls() and __fls() then the relocation in the branch we use for patching will not fit. Move NG4fls.o into lib-y to fix this problem. Fixes: 46ad8d2d22c1 ("sparc64: Use sparc optimized fls and __fls for T4 and above") Signed-off-by: David S. Miller Reported-by: Anatoly Pugachev Tested-by: Anatoly Pugachev --- arch/sparc/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 0f0f76b4f6cd6..063556fe2cb1d 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -19,7 +19,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o -obj-$(CONFIG_SPARC64) += NG4fls.o +lib-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o From 4b81cb2ff69c8a8e297a147d2eb4d9b5e8d7c435 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:09:54 -0800 Subject: [PATCH 332/406] mm, memory_hotplug: do not back off draining pcp free pages from kworker context drain_all_pages backs off when called from a kworker context since commit 0ccce3b92421 ("mm, page_alloc: drain per-cpu pages from workqueue context") because the original IPI based pcp draining has been replaced by a WQ based one and the check wanted to prevent from recursion and inter workers dependencies. This has made some sense at the time because the system WQ has been used and one worker holding the lock could be blocked while waiting for new workers to emerge which can be a problem under OOM conditions. Since then commit ce612879ddc7 ("mm: move pcp and lru-pcp draining into single wq") has moved draining to a dedicated (mm_percpu_wq) WQ with a rescuer so we shouldn't depend on any other WQ activity to make a forward progress so calling drain_all_pages from a worker context is safe as long as this doesn't happen from mm_percpu_wq itself which is not the case because all workers are required to _not_ depend on any MM locks. Why is this a problem in the first place? ACPI driven memory hot-remove (acpi_device_hotplug) is executed from the worker context. We end up calling __offline_pages to free all the pages and that requires both lru_add_drain_all_cpuslocked and drain_all_pages to do their job otherwise we can have dangling pages on pcp lists and fail the offline operation (__test_page_isolated_in_pageblock would see a page with 0 ref count but without PageBuddy set). Fix the issue by removing the worker check in drain_all_pages. lru_add_drain_all_cpuslocked doesn't have this restriction so it works as expected. Link: http://lkml.kernel.org/r/20170828093341.26341-1-mhocko@kernel.org Fixes: 0ccce3b924212 ("mm, page_alloc: drain per-cpu pages from workqueue context") Signed-off-by: Michal Hocko Cc: Mel Gorman Cc: Tejun Heo Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4096f4a5c1f7..145f5181e4cd1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone) if (WARN_ON_ONCE(!mm_percpu_wq)) return; - /* Workqueues cannot recurse */ - if (current->flags & PF_WQ_WORKER) - return; - /* * Do not drain if one is already in progress unless it's specific to * a zone. Such callers are primarily CMA and memory hotplug and need From 687cb0884a714ff484d038e9190edc874edcf146 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 29 Nov 2017 16:09:58 -0800 Subject: [PATCH 333/406] mm, oom_reaper: gather each vma to prevent leaking TLB entry tlb_gather_mmu(&tlb, mm, 0, -1) means gathering the whole virtual memory space. In this case, tlb->fullmm is true. Some archs like arm64 doesn't flush TLB when tlb->fullmm is true: commit 5a7862e83000 ("arm64: tlbflush: avoid flushing when fullmm == 1"). Which causes leaking of tlb entries. Will clarifies his patch: "Basically, we tag each address space with an ASID (PCID on x86) which is resident in the TLB. This means we can elide TLB invalidation when pulling down a full mm because we won't ever assign that ASID to another mm without doing TLB invalidation elsewhere (which actually just nukes the whole TLB). I think that means that we could potentially not fault on a kernel uaccess, because we could hit in the TLB" There could be a window between complete_signal() sending IPI to other cores and all threads sharing this mm are really kicked off from cores. In this window, the oom reaper may calls tlb_flush_mmu_tlbonly() to flush TLB then frees pages. However, due to the above problem, the TLB entries are not really flushed on arm64. Other threads are possible to access these pages through TLB entries. Moreover, a copy_to_user() can also write to these pages without generating page fault, causes use-after-free bugs. This patch gathers each vma instead of gathering full vm space. In this case tlb->fullmm is not true. The behavior of oom reaper become similar to munmapping before do_exit, which should be safe for all archs. Link: http://lkml.kernel.org/r/20171107095453.179940-1-wangnan0@huawei.com Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Wang Nan Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Minchan Kim Cc: Will Deacon Cc: Bob Liu Cc: Ingo Molnar Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c86fbd1b590ec..c957be32b27a9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ set_bit(MMF_UNSTABLE, &mm->flags); - tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; @@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, NULL); + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); + } } - tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), From 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 29 Nov 2017 16:10:01 -0800 Subject: [PATCH 334/406] mm/cma: fix alloc_contig_range ret code/potential leak If the call __alloc_contig_migrate_range() in alloc_contig_range returns -EBUSY, processing continues so that test_pages_isolated() is called where there is a tracepoint to identify the busy pages. However, it is possible for busy pages to become available between the calls to these two routines. In this case, the range of pages may be allocated. Unfortunately, the original return code (ret == -EBUSY) is still set and returned to the caller. Therefore, the caller believes the pages were not allocated and they are leaked. Update the comment to indicate that allocation is still possible even if __alloc_contig_migrate_range returns -EBUSY. Also, clear return code in this case so that it is not accidentally used or returned to caller. Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure") Signed-off-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Joonsoo Kim Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 145f5181e4cd1..73f5d4556b3d0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7652,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* * In case of -EBUSY, we'd like to know which page causes problem. - * So, just fall through. We will check it in test_pages_isolated(). + * So, just fall through. test_pages_isolated() has a tracepoint + * which will report the busy page. + * + * It is possible that busy pages could become available before + * the call to test_pages_isolated, and the range will actually be + * allocated. So, if we fall through be sure to clear ret so that + * -EBUSY is not accidentally used or returned to caller. */ ret = __alloc_contig_migrate_range(&cc, start, end); if (ret && ret != -EBUSY) goto done; + ret =0; /* * Pages from [start, end) are within a MAX_ORDER_NR_PAGES From 1501899a898dfb5477c55534bdfd734c046da06d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:06 -0800 Subject: [PATCH 335/406] mm: fix device-dax pud write-faults triggered by get_user_pages() Currently only get_user_pages_fast() can safely handle the writable gup case due to its use of pud_access_permitted() to check whether the pud entry is writable. In the gup slow path pud_write() is used instead of pud_access_permitted() and to date it has been unimplemented, just calls BUG_ON(). kernel BUG at ./include/linux/hugetlb.h:244! [..] RIP: 0010:follow_devmap_pud+0x482/0x490 [..] Call Trace: follow_page_mask+0x28c/0x6e0 __get_user_pages+0xe4/0x6c0 get_user_pages_unlocked+0x130/0x1b0 get_user_pages_fast+0x89/0xb0 iov_iter_get_pages_alloc+0x114/0x4a0 nfs_direct_read_schedule_iovec+0xd2/0x350 ? nfs_start_io_direct+0x63/0x70 nfs_file_direct_read+0x1e0/0x250 nfs_file_read+0x90/0xc0 For now this just implements a simple check for the _PAGE_RW bit similar to pmd_write. However, this implies that the gup-slow-path check is missing the extra checks that the gup-fast-path performs with pud_access_permitted. Later patches will align all checks to use the 'access_permitted' helper if the architecture provides it. Note that the generic 'access_permitted' helper fallback is the simple _PAGE_RW check on architectures that do not define the 'access_permitted' helper(s). [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129126165.37405.16031785266675461397.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043109938.2842.14834662818213616199.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Dan Williams Reported-by: Stephen Rothwell Acked-by: Thomas Gleixner [x86] Cc: Kirill A. Shutemov Cc: Catalin Marinas Cc: "David S. Miller" Cc: Dave Hansen Cc: Will Deacon Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 6 ++++++ include/asm-generic/pgtable.h | 8 ++++++++ include/linux/hugetlb.h | 8 -------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 09f9e1e00e3bd..dcce76ee4aa7b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; +} + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 757dc6ffc7ba5..1ac457511f4eb 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -814,6 +814,14 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif /* pud_write */ + #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fbf5b31d47eea..82a25880714ac 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd) } #endif -#ifndef pud_write -static inline int pud_write(pud_t pud) -{ - BUG(); - return 0; -} -#endif - #define HUGETLB_ANON_FILE "anon_hugepage" enum { From e4e40e0263ea6a3bfefbfd15d1b6ff5c03f2b95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:10 -0800 Subject: [PATCH 336/406] mm: switch to 'define pmd_write' instead of __HAVE_ARCH_PMD_WRITE In response to compile breakage introduced by a series that added the pud_write helper to x86, Stephen notes: did you consider using the other paradigm: In arch include files: #define pud_write pud_write static inline int pud_write(pud_t pud) ..... Then in include/asm-generic/pgtable.h: #ifndef pud_write tatic inline int pud_write(pud_t pud) { .... } #endif If you had, then the powerpc code would have worked ... ;-) and many of the other interfaces in include/asm-generic/pgtable.h are protected that way ... Given that some architecture already define pmd_write() as a macro, it's a net reduction to drop the definition of __HAVE_ARCH_PMD_WRITE. Link: http://lkml.kernel.org/r/151129126721.37405.13339850900081557813.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Suggested-by: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: "Aneesh Kumar K.V" Cc: Oliver OHalloran Cc: Chris Metcalf Cc: Russell King Cc: Ralf Baechle Cc: "H. Peter Anvin" Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable-3level.h | 1 - arch/arm64/include/asm/pgtable.h | 1 - arch/mips/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/s390/include/asm/pgtable.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/tile/include/asm/pgtable.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- include/asm-generic/pgtable.h | 4 ++-- 9 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a029bceaf2f8..1a7a17b2a1bae 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte) } #define __HAVE_ARCH_PTE_SPECIAL -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pud_page(pud) pmd_page(__pmd(pud_val(pud))) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca83..149d05fb94215 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9e9e94415d08f..1a508a74d48d3 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_WRITE); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f9..44697817ccc6d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d3..0a6b0286c32e9 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 5a9e96be16652..9937c5ff94a9f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 2a26cc4fefc27..adfa21b18488f 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) -#define __HAVE_ARCH_PMD_WRITE #define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dcce76ee4aa7b..95e2dfd755218 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_RW; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 1ac457511f4eb..b234d54f2cb6e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -805,13 +805,13 @@ static inline int pmd_trans_huge(pmd_t pmd) { return 0; } -#ifndef __HAVE_ARCH_PMD_WRITE +#ifndef pmd_write static inline int pmd_write(pmd_t pmd) { BUG(); return 0; } -#endif /* __HAVE_ARCH_PMD_WRITE */ +#endif /* pmd_write */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifndef pud_write From e7fe7b5cae90cf85bb6fed5ec5d4c5cf311a4fe9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:14 -0800 Subject: [PATCH 337/406] mm: replace pud_write with pud_access_permitted in fault + gup paths The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pud_write is must be referencing user-memory. [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129127237.37405.16073414520854722485.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043110453.2842.2166049702068628177.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: "David S. Miller" Cc: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ++++++ arch/sparc/mm/gup.c | 2 +- mm/huge_memory.c | 2 +- mm/memory.c | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0a6b0286c32e9..57d7bc92e0b8a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5335ba3c850ed..5ae2d0a01a706 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (write && !pud_write(pud)) + if (!pud_access_permitted(pud, write)) return 0; refs = 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f22401fd83b5c..4fe2491056a0c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1019,7 +1019,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pud_lockptr(mm, pud)); - if (flags & FOLL_WRITE && !pud_write(*pud)) + if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) return NULL; if (pud_present(*pud) && pud_devmap(*pud)) diff --git a/mm/memory.c b/mm/memory.c index b10c1d26f675b..25d283d46ea32 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* NUMA case for anonymous PUDs would go here */ - if (dirty && !pud_write(orig_pud)) { + if (dirty && !pud_access_permitted(orig_pud, WRITE)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; From c7da82b894e9eef60a04a15f065a8502341bf13b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:18 -0800 Subject: [PATCH 338/406] mm: replace pmd_write with pmd_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pmd_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111049.2842.15241454964150083466.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/mm/gup.c | 2 +- fs/dax.c | 3 ++- mm/hmm.c | 4 ++-- mm/huge_memory.c | 4 ++-- mm/memory.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5ae2d0a01a706..33c0f8bb0f33d 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (write && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write)) return 0; refs = 0; diff --git a/fs/dax.c b/fs/dax.c index 95981591977a0..78b72c48374e5 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; - if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) + if (!pmd_dirty(*pmdp) + && !pmd_access_permitted(*pmdp, WRITE)) goto unlock_pmd; flush_cache_page(vma, address, pfn); diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60b..93718a3916119 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -391,11 +391,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (pmd_protnone(pmd)) return hmm_vma_walk_clear(start, end, walk); - if (write_fault && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write_fault)) return hmm_vma_walk_clear(start, end, walk); pfn = pmd_pfn(pmd) + pte_index(addr); - flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; + flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; for (; addr < end; addr += PAGE_SIZE, i++, pfn++) pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4fe2491056a0c..05b729f45e8a3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -877,7 +877,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, */ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) return NULL; if (pmd_present(*pmd) && pmd_devmap(*pmd)) @@ -1393,7 +1393,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) */ static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) { - return pmd_write(pmd) || + return pmd_access_permitted(pmd, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); } diff --git a/mm/memory.c b/mm/memory.c index 25d283d46ea32..416e451a707e2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); - if (dirty && !pmd_write(orig_pmd)) { + if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; From 5c9d2d5c269c498aa9a546e8d2158a3e4142a1a2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:21 -0800 Subject: [PATCH 339/406] mm: replace pte_write with pte_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pte_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111604.2842.8051684481794973100.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- mm/hmm.c | 4 ++-- mm/memory.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index dfcde13f289a7..85cc822fd403c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { - return pte_write(pte) || + return pte_access_permitted(pte, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); } diff --git a/mm/hmm.c b/mm/hmm.c index 93718a3916119..3a5c172af5603 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -456,11 +456,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, continue; } - if (write_fault && !pte_write(pte)) + if (!pte_access_permitted(pte, write_fault)) goto fault; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; - pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; + pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; continue; fault: diff --git a/mm/memory.c b/mm/memory.c index 416e451a707e2..4f07acd1695fb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf) if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) + if (!pte_access_permitted(entry, WRITE)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } @@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; - if ((flags & FOLL_WRITE) && !pte_write(pte)) + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); From 95a87982541932503d3f59aba4c30b0bde0a6294 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Wed, 29 Nov 2017 16:10:25 -0800 Subject: [PATCH 340/406] scripts/faddr2line: extend usage on generic arch When cross-compiling, fadd2line should use the binary tool used for the target system, rather than that of the host. Link: http://lkml.kernel.org/r/20171121092911.GA150711@sofia Signed-off-by: Liu Changcheng Cc: Kate Stewart Cc: NeilBrown Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f5ce959f5965..39e07d8574dd7 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,9 +44,16 @@ set -o errexit set -o nounset +READELF="${CROSS_COMPILE}readelf" +ADDR2LINE="${CROSS_COMPILE}addr2line" +SIZE="${CROSS_COMPILE}size" +NM="${CROSS_COMPILE}nm" + command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" -command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" +command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" usage() { echo "usage: faddr2line ..." >&2 @@ -69,10 +76,10 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return - local file_line=$(addr2line -e $objfile $start_kernel_addr) + local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) [[ -z $file_line ]] && return local prefix=${file_line%init/main.c:*} @@ -104,7 +111,7 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. - file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') + file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} @@ -156,10 +163,10 @@ __faddr2line() { # pass real address to addr2line echo "$func+$offset/$sym_size:" - addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" + ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:28 -0800 Subject: [PATCH 341/406] mm, hugetlbfs: introduce ->split() to vm_operations_struct Patch series "device-dax: fix unaligned munmap handling" When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and fail attempts to split vmas into unaligned ranges. It would be messy to teach the munmap path about device-dax alignment constraints in the same (hstate) way that hugetlbfs communicates this constraint. Instead, these patches introduce a new ->split() vm operation. This patch (of 2): The device-dax interface has similar constraints as hugetlbfs in that it requires the munmap path to unmap in huge page aligned units. Rather than add more custom vma handling code in __split_vma() introduce a new vm operation to perform this vma specific check. Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Cc: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/hugetlb.c | 8 ++++++++ mm/mmap.c | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ee073146aaa7c..b3b6a7e313e94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -377,6 +377,7 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 681b300185c0c..698e8fb340318 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) } } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; + return 0; +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close, + .split = hugetlb_vm_op_split, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, diff --git a/mm/mmap.c b/mm/mmap.c index 924839fac0e64..a4d5468212149 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - if (is_vm_hugetlb_page(vma) && (addr & - ~(huge_page_mask(hstate_vma(vma))))) - return -EINVAL; + if (vma->vm_ops && vma->vm_ops->split) { + err = vma->vm_ops->split(vma, addr); + if (err) + return err; + } new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) From 9702cffdbf2129516db679e4467db81e1cd287da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:32 -0800 Subject: [PATCH 342/406] device-dax: implement ->split() to catch invalid munmap attempts Similar to how device-dax enforces that the 'address', 'offset', and 'len' parameters to mmap() be aligned to the device's fundamental alignment, the same constraints apply to munmap(). Implement ->split() to fail munmap calls that violate the alignment constraint. Otherwise, we later fail VM_BUG_ON checks in the unmap_page_range() path with crash signatures of the form: vma ffff8800b60c8a88 start 00007f88c0000000 end 00007f88c0e00000 next (null) prev (null) mm ffff8800b61150c0 prot 8000000000000027 anon_vma (null) vm_ops ffffffffa0091240 pgoff 0 file ffff8800b638ef80 private_data (null) flags: 0x380000fb(read|write|shared|mayread|maywrite|mayexec|mayshare|softdirty|mixedmap|hugepage) ------------[ cut here ]------------ kernel BUG at mm/huge_memory.c:2014! [..] RIP: 0010:__split_huge_pud+0x12a/0x180 [..] Call Trace: unmap_page_range+0x245/0xa40 ? __vma_adjust+0x301/0x990 unmap_vmas+0x4c/0xa0 unmap_region+0xae/0x120 ? __vma_rb_erase+0x11a/0x230 do_munmap+0x276/0x410 vm_munmap+0x6a/0xa0 SyS_munmap+0x1d/0x30 Link: http://lkml.kernel.org/r/151130418681.4029.7118245855057952010.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Reported-by: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6833ada237ab7..7b0bf825c4e73 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf) return dev_dax_huge_fault(vmf, PE_SIZE_PTE); } +static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr) +{ + struct file *filp = vma->vm_file; + struct dev_dax *dev_dax = filp->private_data; + struct dax_region *dax_region = dev_dax->region; + + if (!IS_ALIGNED(addr, dax_region->align)) + return -EINVAL; + return 0; +} + static const struct vm_operations_struct dax_vm_ops = { .fault = dev_dax_fault, .huge_fault = dev_dax_huge_fault, + .split = dev_dax_split, }; static int dax_mmap(struct file *filp, struct vm_area_struct *vma) From 2bb6d2837083de722bfdc369cb0d76ce188dd9b4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:35 -0800 Subject: [PATCH 343/406] mm: introduce get_user_pages_longterm Patch series "introduce get_user_pages_longterm()", v2. Here is a new get_user_pages api for cases where a driver intends to keep an elevated page count indefinitely. This is distinct from usages like iov_iter_get_pages where the elevated page counts are transient. The iov_iter_get_pages cases immediately turn around and submit the pages to a device driver which will put_page when the i/o operation completes (under kernel control). In the longterm case userspace is responsible for dropping the page reference at some undefined point in the future. This is untenable for filesystem-dax case where the filesystem is in control of the lifetime of the block / page and needs reasonable limits on how long it can wait for pages in a mapping to become idle. Fixing filesystems to actually wait for dax pages to be idle before blocks from a truncate/hole-punch operation are repurposed is saved for a later patch series. Also, allowing longterm registration of dax mappings is a future patch series that introduces a "map with lease" semantic where the kernel can revoke a lease and force userspace to drop its page references. I have also tagged these for -stable to purposely break cases that might assume that longterm memory registrations for filesystem-dax mappings were supported by the kernel. The behavior regression this policy change implies is one of the reasons we maintain the "dax enabled. Warning: EXPERIMENTAL, use at your own risk" notification when mounting a filesystem in dax mode. It is worth noting the device-dax interface does not suffer the same constraints since it does not support file space management operations like hole-punch. This patch (of 4): Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow long standing memory registrations against filesytem-dax vmas. Device-dax vmas do not have this problem and are explicitly allowed. This is temporary until a "memory registration with layout-lease" mechanism can be implemented for the affected sub-systems (RDMA and V4L2). [akpm@linux-foundation.org: use kcalloc()] Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Suggested-by: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 14 ++++++++++ include/linux/mm.h | 13 ++++++++++ mm/gup.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index bbd92da0946e1..9dc498d16cc14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma) return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } +static inline bool vma_is_fsdax(struct vm_area_struct *vma) +{ + struct inode *inode; + + if (!vma->vm_file) + return false; + if (!vma_is_dax(vma)) + return false; + inode = file_inode(vma->vm_file); + if (inode->i_mode == S_IFCHR) + return false; /* device-dax */ + return true; +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index b3b6a7e313e94..ea818ff739cdf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +#ifdef CONFIG_FS_DAX +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas); +#else +static inline long get_user_pages_longterm(unsigned long start, + unsigned long nr_pages, unsigned int gup_flags, + struct page **pages, struct vm_area_struct **vmas) +{ + return get_user_pages(start, nr_pages, gup_flags, pages, vmas); +} +#endif /* CONFIG_FS_DAX */ + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index 85cc822fd403c..d3fb60e5bfacd 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas_arg) +{ + struct vm_area_struct **vmas = vmas_arg; + struct vm_area_struct *vma_prev = NULL; + long rc, i; + + if (!pages) + return -EINVAL; + + if (!vmas) { + vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + return -ENOMEM; + } + + rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + + for (i = 0; i < rc; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma == vma_prev) + continue; + + vma_prev = vma; + + if (vma_is_fsdax(vma)) + break; + } + + /* + * Either get_user_pages() failed, or the vma validation + * succeeded, in either case we don't need to put_page() before + * returning. + */ + if (i >= rc) + goto out; + + for (i = 0; i < rc; i++) + put_page(pages[i]); + rc = -EOPNOTSUPP; +out: + if (vmas != vmas_arg) + kfree(vmas); + return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma From b7f0554a56f21fb3e636a627450a9add030889be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:39 -0800 Subject: [PATCH 344/406] mm: fail get_vaddr_frames() for filesystem-dax mappings Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow V4L2, Exynos, and other frame vector users to create long standing / irrevocable memory registrations against filesytem-dax vmas. [dan.j.williams@intel.com: add comment for vma_is_fsdax() check in get_vaddr_frames(), per Jan] Link: http://lkml.kernel.org/r/151197874035.26211.4061781453123083667.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151068939985.7446.15684639617389154187.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reviewed-by: Jan Kara Cc: Inki Dae Cc: Seung-Woo Kim Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Vlastimil Babka Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Ross Zwisler Cc: Sean Hefty Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/frame_vector.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 2f98df0d460ee..297c7238f7d40 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = -EFAULT; goto out; } + + /* + * While get_vaddr_frames() could be used for transient (kernel + * controlled lifetime) pinning of memory pages all current + * users establish long term (userspace controlled lifetime) + * page pinning. Treat get_vaddr_frames() like + * get_user_pages_longterm() and disallow it for filesystem-dax + * mappings. + */ + if (vma_is_fsdax(vma)) + return -EOPNOTSUPP; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { vec->got_ref = true; vec->is_pfns = false; From b70131de648c2b997d22f4653934438013f407a1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:43 -0800 Subject: [PATCH 345/406] v4l2: disable filesystem-dax mapping support V4L2 memory registrations are incompatible with filesystem-dax that needs the ability to revoke dma access to a mapping at will, or otherwise allow the kernel to wait for completion of DMA. The filesystem-dax implementation breaks the traditional solution of truncate of active file backed mappings since there is no page-cache page we can orphan to sustain ongoing DMA. If v4l2 wants to support long lived DMA mappings it needs to arrange to hold a file lease or use some other mechanism so that the kernel can coordinate revoking DMA access when the filesystem needs to truncate mappings. Link: http://lkml.kernel.org/r/151068940499.7446.12846708245365671207.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Jan Kara Reviewed-by: Jan Kara Cc: Mauro Carvalho Chehab Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/v4l2-core/videobuf-dma-sg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 0b5c43f7e020d..f412429cf5ba5 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); - err = get_user_pages(data & PAGE_MASK, dma->nr_pages, + err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages, flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; - dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages); + dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err, + dma->nr_pages); return err < 0 ? err : -EINVAL; } return 0; From 5f1d43de54164dcfb9bfa542fcc92c1e1a1b6c1d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:47 -0800 Subject: [PATCH 346/406] IB/core: disable memory registration of filesystem-dax vmas Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow RDMA to create long standing memory registrations against filesytem-dax vmas. Link: http://lkml.kernel.org/r/151068941011.7446.7766030590347262502.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Jason Gunthorpe Acked-by: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Jeff Moyer Cc: Ross Zwisler Cc: Inki Dae Cc: Jan Kara Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 21e60b1e2ff41..130606c3b07c1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_list_start = umem->sg_head.sgl; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); From 04e35f4495dd560db30c25efca4eecae8ec8c375 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 29 Nov 2017 16:10:51 -0800 Subject: [PATCH 347/406] exec: avoid RLIMIT_STACK races with prlimit() While the defense-in-depth RLIMIT_STACK limit on setuid processes was protected against races from other threads calling setrlimit(), I missed protecting it against races from external processes calling prlimit(). This adds locking around the change and makes sure that rlim_max is set too. Link: http://lkml.kernel.org/r/20171127193457.GA11348@beast Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec") Signed-off-by: Kees Cook Reported-by: Ben Hutchings Reported-by: Brad Spengler Acked-by: Serge Hallyn Cc: James Morris Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Jiri Slaby Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 1d6243d9f2b65..6be2aa0ab26fe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm) * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid - * needing to clean up the change on failure. + * races from other threads changing the limits. This also + * must be protected from races with prlimit() calls. */ + task_lock(current->group_leader); if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; + if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM) + current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM; + task_unlock(current->group_leader); } arch_pick_mmap_layout(current->mm); From 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 Mon Sep 17 00:00:00 2001 From: chenjie Date: Wed, 29 Nov 2017 16:10:54 -0800 Subject: [PATCH 348/406] mm/madvise.c: fix madvise() infinite loop under special circumstances MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings. Unfortunately madvise_willneed() doesn't communicate this information properly to the generic madvise syscall implementation. The calling convention is quite subtle there. madvise_vma() is supposed to either return an error or update &prev otherwise the main loop will never advance to the next vma and it will keep looping for ever without a way to get out of the kernel. It seems this has been broken since introduction. Nobody has noticed because nobody seems to be using MADVISE_WILLNEED on these DAX mappings. [mhocko@suse.com: rewrite changelog] Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place") Signed-off-by: chenjie Signed-off-by: guoxuenan Acked-by: Michal Hocko Cc: Minchan Kim Cc: zhangyi (F) Cc: Miao Xie Cc: Mike Rapoport Cc: Shaohua Li Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: David Rientjes Cc: Anshuman Khandual Cc: Rik van Riel Cc: Carsten Otte Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 375cf32087e4a..751e97aa22106 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; From 90daf3062fc0f8f919d5496fe167bbd6016a6a63 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:10:58 -0800 Subject: [PATCH 349/406] Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical" This reverts commit 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") because it causes false positive warnings during OOM situations as noticed by Tetsuo Handa: Node 0 active_anon:3525940kB inactive_anon:8372kB active_file:216kB inactive_file:1872kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:2504kB dirty:52kB writeback:0kB shmem:8660kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 636928kB writeback_tmp:0kB unstable:0kB all_unreclaimable? yes Node 0 DMA free:14848kB min:284kB low:352kB high:420kB active_anon:992kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15988kB managed:15904kB mlocked:0kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 2687 3645 3645 Node 0 DMA32 free:53004kB min:49608kB low:62008kB high:74408kB active_anon:2712648kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:3129216kB managed:2773132kB mlocked:0kB kernel_stack:96kB pagetables:5096kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 0 958 958 Node 0 Normal free:17140kB min:17684kB low:22104kB high:26524kB active_anon:812300kB inactive_anon:8372kB active_file:1228kB inactive_file:1868kB unevictable:0kB writepending:52kB present:1048576kB managed:981224kB mlocked:0kB kernel_stack:3520kB pagetables:8552kB bounce:0kB free_pcp:120kB local_pcp:120kB free_cma:0kB lowmem_reserve[]: 0 0 0 0 [...] Out of memory: Kill process 8459 (a.out) score 999 or sacrifice child Killed process 8459 (a.out) total-vm:4180kB, anon-rss:88kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 8459 (a.out), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB vm direct limit must be set greater than background limit. The problem is that both thresh and bg_thresh will be 0 if available_memory is less than 4 pages when evaluating global_dirtyable_memory. While this might be worked around the whole point of the warning is dubious at best. We do rely on admins to do sensible things when changing tunable knobs. Dirty memory writeback knobs are not any special in that regards so revert the warning rather than adding more hacks to work this around. Debugged by Yafang Shao. Link: http://lkml.kernel.org/r/20171127091939.tahb77nznytcxw55@dhcp22.suse.cz Fixes: 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Cc: Yafang Shao Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 7 ------- mm/page-writeback.c | 5 +---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b920423f88cbc..5025ff9307e66 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any value lower than this limit will be ignored and the old configuration will be retained. -Note: the value of dirty_bytes also must be set greater than -dirty_background_bytes or the amount of memory corresponding to -dirty_background_ratio. - ============================================================== dirty_expire_centisecs @@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data. The total available memory is not equal to total system memory. -Note: dirty_ratio must be set greater than dirty_background_ratio or -ratio corresponding to dirty_background_bytes. - ============================================================== dirty_writeback_centisecs diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7095030aa1fa..586f31261c832 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (unlikely(bg_thresh >= thresh)) { - pr_warn("vm direct limit must be set greater than background limit.\n"); + if (bg_thresh >= thresh) bg_thresh = thresh / 2; - } - tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; From d5dabd633922ac5ee5bcc67748f7defb8b211469 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 29 Nov 2017 16:11:01 -0800 Subject: [PATCH 350/406] fs/mbcache.c: make count_objects() more robust When running ltp stress test for 7*24 hours, vmscan occasionally emits the following warning continuously: mb_cache_scan+0x0/0x3f0 negative objects to delete nr=-9232265467809300450 ... Tracing shows the freeable(mb_cache_count returns) is -1, which causes the continuous accumulation and overflow of total_scan. This patch makes sure that mb_cache_count() cannot return a negative value, which makes the mbcache shrinker more robust. Link: http://lkml.kernel.org/r/1511753419-52328-1-git-send-email-jiang.biao2@zte.com.cn Signed-off-by: Jiang Biao Cc: Al Viro Cc: Minchan Kim Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/mbcache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index d818fd2367870..b8b8b9ced9f81 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink, struct mb_cache *cache = container_of(shrink, struct mb_cache, c_shrink); + /* Unlikely, but not impossible */ + if (unlikely(cache->c_entry_count < 0)) + return 0; return cache->c_entry_count; } From edbddb83a15b4361d8c3bf00aabee85fd3ef4d80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Nov 2017 16:11:05 -0800 Subject: [PATCH 351/406] scripts/bloat-o-meter: don't fail with division by 0 Under some circumstances it's possible to get a divider 0 which crashes the script. Traceback (most recent call last): File "linux/scripts/bloat-o-meter", line 98, in print_result("Function", "tTdDbBrR", 2) File "linux/scripts/bloat-o-meter", line 87, in print_result (otot, ntot, (ntot - otot)*100.0/otot)) ZeroDivisionError: float division by zero Hide this by checking the divider first. Link: http://lkml.kernel.org/r/20171123171219.31453-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexey Dobriyan Cc: Vaneet Narang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/bloat-o-meter | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 6f099f915dcfe..94b664817ad91 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc): for d, n in delta: if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) - print("Total: Before=%d, After=%d, chg %+.2f%%" % \ - (otot, ntot, (ntot - otot)*100.0/otot)) + if otot: + percent = (ntot - otot) * 100.0 / otot + else: + percent = 0 + print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent)) if sys.argv[1] == "-c": print_result("Function", "tT", 3) From bde5f6bc68db51128f875a756e9082a6c6ff7b4c Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: [PATCH 352/406] kmemleak: add scheduling point to kmemleak_scan() kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e4738d5e9b8c5..3d4781756d50f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1523,6 +1523,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); From 40a899ed16486455f964e46d1af31fd4fded21c1 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 29 Nov 2017 16:11:12 -0800 Subject: [PATCH 353/406] mm: migrate: fix an incorrect call of prep_transhuge_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during memory hotplug/hot remove prep_transhuge_page() is called incorrectly on non-THP pages for migration, when THP is on but THP migration is not enabled. This leads to a bad state of target pages for migration. By inspecting the code, if called on a non-THP, prep_transhuge_page() will 1) change the value of the mapping of (page + 2), since it is used for THP deferred list; 2) change the lru value of (page + 1), since it is used for THP's dtor. Both can lead to data corruption of these two pages. Andrea said: "Pragmatically and from the point of view of the memory_hotplug subsys, the effect is a kernel crash when pages are being migrated during a memory hot remove offline and migration target pages are found in a bad state" This patch fixes it by only calling prep_transhuge_page() when we are certain that the target page is THP. Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration") Signed-off-by: Zi Yan Reported-by: Andrea Reale Cc: Naoya Horiguchi Cc: Michal Hocko Cc: "Jérôme Glisse" Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 895ec0c4942e6..a2246cf670bad 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page, new_page = __alloc_pages_nodemask(gfp_mask, order, preferred_nid, nodemask); - if (new_page && PageTransHuge(page)) + if (new_page && PageTransHuge(new_page)) prep_transhuge_page(new_page); return new_page; From d08afa149acfd00871484ada6dabc3880524cd1c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 29 Nov 2017 16:11:15 -0800 Subject: [PATCH 354/406] mm, memcg: fix mem_cgroup_swapout() for THPs Commit d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") changed mem_cgroup_swapout() to support transparent huge page (THP). However the patch missed one location which should be changed for correctly handling THPs. The resulting bug will cause the memory cgroups whose THPs were swapped out to become zombies on deletion. Link: http://lkml.kernel.org/r/20171128161941.20931-1-shakeelb@google.com Fixes: d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Huang Ying Cc: Vladimir Davydov Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50e6906314f8d..ac2ffd5e02b91 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) memcg_check_events(memcg, page); if (!mem_cgroup_is_root(memcg)) - css_put(&memcg->css); + css_put_many(&memcg->css, nr_entries); } /** From b6e8e12c0aeb5fbf1bf46c84d58cc93aedede385 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Wed, 29 Nov 2017 16:11:19 -0800 Subject: [PATCH 355/406] fs/fat/inode.c: fix sb_rdonly() change Commit bc98a42c1f7d ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") converted fat_remount():new_rdonly from a bool to an int. However fat_remount() depends upon the compiler's conversion of a non-zero integer into boolean `true'. Fix it by switching `new_rdonly' back into a bool. Link: http://lkml.kernel.org/r/87mv3d5x51.fsf@mail.parknet.co.jp Fixes: bc98a42c1f7d0f8 ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") Signed-off-by: OGAWA Hirofumi Cc: Joe Perches Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 016c46b5e44c9..20a0a89eaca58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { - int new_rdonly; + bool new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); From 43694d4bf843ddd34519e8e9de983deefeada699 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:23 -0800 Subject: [PATCH 356/406] autofs: revert "autofs: take more care to not update last_used on path walk" While commit 092a53452bb7 ("autofs: take more care to not update last_used on path walk") helped (partially) resolve a problem where automounts were not expiring due to aggressive accesses from user space it has a side effect for very large environments. This change helps with the expire problem by making the expire more aggressive but, for very large environments, that means more mount requests from clients. When there are a lot of clients that can mean fairly significant server load increases. It turns out I put the last_used in this position to solve this very problem and failed to update my own thinking of the autofs expire policy. So the patch being reverted introduces a regression which should be fixed. Link: http://lkml.kernel.org/r/151174729420.6162.1832622523537052460.stgit@pluto.themaw.net Fixes: 092a53452b ("autofs: take more care to not update last_used on path walk") Signed-off-by: Ian Kent Reviewed-by: NeilBrown Cc: Al Viro Cc: [4.11+] Cc: Colin Walters Cc: David Howells Cc: Ondrej Holy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d79ced9258614..82e8f6edfb48d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs4_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); - ino->last_used = jiffies; } + ino->last_used = jiffies; return status; } @@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; + struct autofs_info *ino; struct dentry *new; new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; - if (new == dentry) - dput(new); - else { - struct autofs_info *ino; - - ino = autofs4_dentry_ino(new); - ino->last_used = jiffies; - dput(path->dentry); - path->dentry = new; - } + ino = autofs4_dentry_ino(new); + ino->last_used = jiffies; + dput(path->dentry); + path->dentry = new; } return path->dentry; } From 5d38f049cee1e1c4a7ac55aa79d37d01ddcc3860 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:26 -0800 Subject: [PATCH 357/406] autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored" Commit 42f461482178 ("autofs: fix AT_NO_AUTOMOUNT not being honored") allowed the fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT flag but introduced a semantic change. In order to honor AT_NO_AUTOMOUNT a semantic change was made to the negative dentry case for stat family system calls in follow_automount(). This changed the unconditional triggering of an automount in this case to no longer be done and an error returned instead. This has caused more problems than I expected so reverting the change is needed. In a discussion with Neil Brown it was concluded that the automount(8) daemon can implement this change without kernel modifications. So that will be done instead and the autofs module documentation updated with a description of the problem and what needs to be done by module users for this specific case. Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.themaw.net Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored") Signed-off-by: Ian Kent Cc: Neil Brown Cc: Al Viro Cc: David Howells Cc: Colin Walters Cc: Ondrej Holy Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 15 +++------------ include/linux/fs.h | 3 ++- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f0c7a7b9b6ca7..9cc91fb7f1565 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd, * of the daemon to instantiate them before they can be used. */ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | - LOOKUP_AUTOMOUNT))) { - /* Positive dentry that isn't meant to trigger an - * automount, EISDIR will allow it to be used, - * otherwise there's no mount here "now" so return - * ENOENT. - */ - if (path->dentry->d_inode) - return -EISDIR; - else - return -ENOENT; - } + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && + path->dentry->d_inode) + return -EISDIR; if (path->dentry->d_sb->s_user_ns != &init_user_ns) return -EACCES; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dc498d16cc14..511fbaabf6248 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { From f4f0a3d85b50a65a348e2b8635041d6b30f01deb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 29 Nov 2017 16:11:30 -0800 Subject: [PATCH 358/406] mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine I made a mistake during converting hugetlb code to 5-level paging: in huge_pte_alloc() we have to use p4d_alloc(), not p4d_offset(). Otherwise it leads to crash -- NULL-pointer dereference in pud_alloc() if p4d table is not yet allocated. It only can happen in 5-level paging mode. In 4-level paging mode p4d_offset() always returns pgd, so we are fine. Link: http://lkml.kernel.org/r/20171122121921.64822-1-kirill.shutemov@linux.intel.com Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging") Signed-off-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 698e8fb340318..9a334f5fb7308 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4635,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - p4d = p4d_offset(pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { From 72639e6df4128dfde8fee7638a35be7db8114000 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 29 Nov 2017 16:11:33 -0800 Subject: [PATCH 359/406] fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate() hugetlfs_fallocate() currently performs put_page() before unlock_page(). This scenario opens a small time window, from the time the page is added to the page cache, until it is unlocked, in which the page might be removed from the page-cache by another core. If the page is removed during this time windows, it might cause a memory corruption, as the wrong page will be unlocked. It is arguable whether this scenario can happen in a real system, and there are several mitigating factors. The issue was found by code inspection (actually grep), and not by actually triggering the flow. Yet, since putting the page before unlocking is incorrect it should be fixed, if only to prevent future breakage or someone copy-pasting this code. Mike said: "I am of the opinion that this does not need to be sent to stable. Although the ordering is current code is incorrect, there is no way for this to be a problem with current locking. In addition, I verified that the perhaps bigger issue with sys_fadvise64(POSIX_FADV_DONTNEED) for hugetlbfs and other filesystems is addressed in 3a77d214807c ("mm: fadvise: avoid fadvise for fs without backing device")" Link: http://lkml.kernel.org/r/20170826191124.51642-1-namit@vmware.com Fixes: 70c3547e36f5c ("hugetlbfs: add hugetlbfs_fallocate()") Signed-off-by: Nadav Amit Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Eric Biggers Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e76730aac0de..8a85f3f534465 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_unlock(&hugetlb_fault_mutex_table[hash]); /* - * page_put due to reference from alloc_huge_page() * unlock_page because locked by add_to_page_cache() + * page_put due to reference from alloc_huge_page() */ - put_page(page); unlock_page(page); + put_page(page); } if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) From 1b6fba458c0a2e8513071330972c4c587b7d28cc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 21 Nov 2017 09:17:43 +0100 Subject: [PATCH 360/406] drm/bridge: adv7511/33: Fix adv7511_cec_init() failure handling If the device tree for a board did not specify a cec clock, then adv7511_cec_init would return an error, which would cause adv7511_probe() to fail and thus there is no HDMI output. There is no need to have adv7511_probe() fail if the CEC initialization fails, so just change adv7511_cec_init() to a void function. In addition, adv7511_cec_init() should just return silently if the cec clock isn't found and show a message for any other errors. An otherwise correct cleanup patch from Dan Carpenter turned this broken failure handling into a kernel Oops, so bisection points to commit 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") rather than 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support"). Based on earlier patches from Arnd and John. Reported-by: Naresh Kamboju Cc: Xinliang Liu Cc: Dan Carpenter Cc: Sean Paul Cc: Archit Taneja Cc: John Stultz Link: https://bugs.linaro.org/show_bug.cgi?id=3345 Link: https://lkft.validation.linaro.org/scheduler/job/48017#L3551 Fixes: 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") Fixes: 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support") Signed-off-by: Hans Verkuil Tested-by: Hans Verkuil Reviewed-by: Laurent Pinchart Tested-by: John Stultz Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/9097b2a4-b6b9-5fca-e039-0a17694b1143@xs4all.nl --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 13 ++++++-- drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 32 ++++++++++++++------ drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 17 +++-------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index b4efcbabf7f72..d034b2cb5eee3 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -372,9 +372,18 @@ struct adv7511 { }; #ifdef CONFIG_DRM_I2C_ADV7511_CEC -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset); +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511); void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); +#else +static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) +{ + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; + + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return 0; +} #endif #ifdef CONFIG_DRM_I2C_ADV7533 diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index b33d730e4d736..a20a45c0b353f 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -300,18 +300,21 @@ static int adv7511_cec_parse_dt(struct device *dev, struct adv7511 *adv7511) return 0; } -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset) +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) { + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; int ret = adv7511_cec_parse_dt(dev, adv7511); if (ret) - return ret; + goto err_cec_parse_dt; adv7511->cec_adap = cec_allocate_adapter(&adv7511_cec_adap_ops, adv7511, dev_name(dev), CEC_CAP_DEFAULTS, ADV7511_MAX_ADDRS); - if (IS_ERR(adv7511->cec_adap)) - return PTR_ERR(adv7511->cec_adap); + if (IS_ERR(adv7511->cec_adap)) { + ret = PTR_ERR(adv7511->cec_adap); + goto err_cec_alloc; + } regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, 0); /* cec soft reset */ @@ -329,9 +332,18 @@ int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, ((adv7511->cec_clk_freq / 750000) - 1) << 2); ret = cec_register_adapter(adv7511->cec_adap, dev); - if (ret) { - cec_delete_adapter(adv7511->cec_adap); - adv7511->cec_adap = NULL; - } - return ret; + if (ret) + goto err_cec_register; + return 0; + +err_cec_register: + cec_delete_adapter(adv7511->cec_adap); + adv7511->cec_adap = NULL; +err_cec_alloc: + dev_info(dev, "Initializing CEC failed with error %d, disabling CEC\n", + ret); +err_cec_parse_dt: + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return ret == -EPROBE_DEFER ? ret : 0; } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 0e14f1572d059..efa29db5fc2b7 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1084,7 +1084,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) struct device *dev = &i2c->dev; unsigned int main_i2c_addr = i2c->addr << 1; unsigned int edid_i2c_addr = main_i2c_addr + 4; - unsigned int offset; unsigned int val; int ret; @@ -1192,24 +1191,16 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (adv7511->type == ADV7511) adv7511_set_link_config(adv7511, &link_config); + ret = adv7511_cec_init(dev, adv7511); + if (ret) + goto err_unregister_cec; + adv7511->bridge.funcs = &adv7511_bridge_funcs; adv7511->bridge.of_node = dev->of_node; drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); - - offset = adv7511->type == ADV7533 ? ADV7533_REG_CEC_OFFSET : 0; - -#ifdef CONFIG_DRM_I2C_ADV7511_CEC - ret = adv7511_cec_init(dev, adv7511, offset); - if (ret) - goto err_unregister_cec; -#else - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, - ADV7511_CEC_CTRL_POWER_DOWN); -#endif - return 0; err_unregister_cec: From ebe32c3e282a62974b190b9d514864fc0d56716e Mon Sep 17 00:00:00 2001 From: Pierre-Hugues Husson Date: Sat, 25 Nov 2017 21:18:44 +0100 Subject: [PATCH 361/406] drm/bridge: synopsys/dw-hdmi: Enable cec clock Support the "cec" optional clock. The documentation already mentions "cec" optional clock and it is used by several boards, but currently the driver doesn't enable it, thus preventing cec from working on those boards. And even worse: a /dev/cecX device will appear for those boards, but it won't be functioning without configuring this clock. Changes: v4: - Change commit message to stress the importance of this patch v3: - Drop useless braces v2: - Separate ENOENT errors from others - Propagate other errors (especially -EPROBE_DEFER) Signed-off-by: Pierre-Hugues Husson Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171125201844.11353-1-phh@phh.me --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index bf14214fa4640..b72259bf6e2fb 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -138,6 +138,7 @@ struct dw_hdmi { struct device *dev; struct clk *isfr_clk; struct clk *iahb_clk; + struct clk *cec_clk; struct dw_hdmi_i2c *i2c; struct hdmi_data_info hdmi_data; @@ -2382,6 +2383,26 @@ __dw_hdmi_probe(struct platform_device *pdev, goto err_isfr; } + hdmi->cec_clk = devm_clk_get(hdmi->dev, "cec"); + if (PTR_ERR(hdmi->cec_clk) == -ENOENT) { + hdmi->cec_clk = NULL; + } else if (IS_ERR(hdmi->cec_clk)) { + ret = PTR_ERR(hdmi->cec_clk); + if (ret != -EPROBE_DEFER) + dev_err(hdmi->dev, "Cannot get HDMI cec clock: %d\n", + ret); + + hdmi->cec_clk = NULL; + goto err_iahb; + } else { + ret = clk_prepare_enable(hdmi->cec_clk); + if (ret) { + dev_err(hdmi->dev, "Cannot enable HDMI cec clock: %d\n", + ret); + goto err_iahb; + } + } + /* Product and revision IDs */ hdmi->version = (hdmi_readb(hdmi, HDMI_DESIGN_ID) << 8) | (hdmi_readb(hdmi, HDMI_REVISION_ID) << 0); @@ -2518,6 +2539,8 @@ __dw_hdmi_probe(struct platform_device *pdev, cec_notifier_put(hdmi->cec_notifier); clk_disable_unprepare(hdmi->iahb_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); err_isfr: clk_disable_unprepare(hdmi->isfr_clk); err_res: @@ -2541,6 +2564,8 @@ static void __dw_hdmi_remove(struct dw_hdmi *hdmi) clk_disable_unprepare(hdmi->iahb_clk); clk_disable_unprepare(hdmi->isfr_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); if (hdmi->i2c) i2c_del_adapter(&hdmi->i2c->adap); From dbb58bfd9ae6c885b2ca001a9a5ab8b881fb4ba9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Nov 2017 11:16:47 -0800 Subject: [PATCH 362/406] drm/bridge: Fix lvds-encoder since the panel_bridge rework. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The panel_bridge bridge attaches to the panel's OF node, not the lvds-encoder's node. Put in a little no-op bridge of our own so that our consumers can still find a bridge where they expect. This also fixes an unintended unregistration and leak of the panel-bridge on module remove. Signed-off-by: Eric Anholt Fixes: 13dfc0540a57 ("drm/bridge: Refactor out the panel wrapper from the lvds-encoder bri dge.") Tested-by: Lothar Waßmann Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171114191647.22207-1-eric@anholt.net --- drivers/gpu/drm/bridge/lvds-encoder.c | 48 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c index 0903ba574f61c..75b0d3f6e4de9 100644 --- a/drivers/gpu/drm/bridge/lvds-encoder.c +++ b/drivers/gpu/drm/bridge/lvds-encoder.c @@ -13,13 +13,37 @@ #include +struct lvds_encoder { + struct drm_bridge bridge; + struct drm_bridge *panel_bridge; +}; + +static int lvds_encoder_attach(struct drm_bridge *bridge) +{ + struct lvds_encoder *lvds_encoder = container_of(bridge, + struct lvds_encoder, + bridge); + + return drm_bridge_attach(bridge->encoder, lvds_encoder->panel_bridge, + bridge); +} + +static struct drm_bridge_funcs funcs = { + .attach = lvds_encoder_attach, +}; + static int lvds_encoder_probe(struct platform_device *pdev) { struct device_node *port; struct device_node *endpoint; struct device_node *panel_node; struct drm_panel *panel; - struct drm_bridge *bridge; + struct lvds_encoder *lvds_encoder; + + lvds_encoder = devm_kzalloc(&pdev->dev, sizeof(*lvds_encoder), + GFP_KERNEL); + if (!lvds_encoder) + return -ENOMEM; /* Locate the panel DT node. */ port = of_graph_get_port_by_id(pdev->dev.of_node, 1); @@ -49,20 +73,30 @@ static int lvds_encoder_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_LVDS); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); + lvds_encoder->panel_bridge = + devm_drm_panel_bridge_add(&pdev->dev, + panel, DRM_MODE_CONNECTOR_LVDS); + if (IS_ERR(lvds_encoder->panel_bridge)) + return PTR_ERR(lvds_encoder->panel_bridge); + + /* The panel_bridge bridge is attached to the panel's of_node, + * but we need a bridge attached to our of_node for our user + * to look up. + */ + lvds_encoder->bridge.of_node = pdev->dev.of_node; + lvds_encoder->bridge.funcs = &funcs; + drm_bridge_add(&lvds_encoder->bridge); - platform_set_drvdata(pdev, bridge); + platform_set_drvdata(pdev, lvds_encoder); return 0; } static int lvds_encoder_remove(struct platform_device *pdev) { - struct drm_bridge *bridge = platform_get_drvdata(pdev); + struct lvds_encoder *lvds_encoder = platform_get_drvdata(pdev); - drm_bridge_remove(bridge); + drm_bridge_remove(&lvds_encoder->bridge); return 0; } From cffd2b16c01c3431a7a7dd62e722af33490fc436 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: [PATCH 363/406] drm/bridge: tc358767: do no fail on hi-res displays Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8571cfd877c52..e1996dbfb0a18 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) From 99fc8e963a4c0203dba26a77cf737db6081bca14 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: [PATCH 364/406] drm/bridge: tc358767: filter out too high modes Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e1996dbfb0a18..e571b5e29ae78 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1103,7 +1103,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } From f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: [PATCH 365/406] drm/bridge: tc358767: fix DP0_MISC register set Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e571b5e29ae78..7334cb2121a38 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: From 66d1c3b94d5d59e4325e61a78d520f92c043d645 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: [PATCH 366/406] drm/bridge: tc358767: fix timing calculations Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 7334cb2121a38..b916346b933a5 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); From 9217c1abbc145a77d65c476cf2004a3df02104c7 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: [PATCH 367/406] drm/bridge: tc358767: fix AUXDATAn registers access First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index b916346b933a5..24f495bf0567b 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } From 4dbd6c03fbf88299c573d676838896c6e06aade2 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: [PATCH 368/406] drm/bridge: tc358767: fix 1-lane behavior Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 24f495bf0567b..8636e7eeb7315 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } From fd50fbb6bfdea5daa4ae4dd7b7082485ac44bdf5 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 27 Nov 2017 11:12:33 +0200 Subject: [PATCH 369/406] drm/i915: Disable THP until we have a GPU read BW W/A We seem to be missing some W/A for 2M pages and are getting a hit on raw GPU read bandwidths (even 30%) even though the GPU write bandwidths improve (even 10%). For now, disable THP, which is our only practical source of 2M pages until we have a W/A for the issue. v2: - Be explicit that we talk about GPU bandwidths (Eero) - s/deny/never/ because that's why (Chris) Reported-by: Valtteri Rantala Fixes: b901bb89324a ("drm/i915/gemfs: enable THP") Signed-off-by: Joonas Lahtinen Cc: Matthew Auld Cc: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Valtteri Rantala Cc: Eero Tamminen Reviewed-by: Chris Wilson Reviewed-by: Matthew Auld Tested-by: Valtteri Rantala Link: https://patchwork.freedesktop.org/patch/msgid/20171127091233.7001-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 9987da4b5dcfc8b94b702d4bb94b30955eb73c75) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gemfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c index e2993857df37b..888b7d3f04c30 100644 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -52,7 +52,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (has_transparent_hugepage()) { struct super_block *sb = gemfs->mnt_sb; - char options[] = "huge=within_size"; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; int flags = 0; int err; From 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: [PATCH 370/406] drm/omap: Fix error handling path in 'omap_dmm_probe()' If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 1dd3dafc59afd..c60a85e82c6d8 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; From 499ec0ed5eb2f6a7fcaab2dd66ffc5993484bda9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:06:41 +0200 Subject: [PATCH 371/406] drm/omap: displays: panel-dpi: add backlight dependency The new backlight code causes a link failure when backlight support itself is disabled: drivers/gpu/drm/omapdrm/displays/panel-dpi.o: In function `panel_dpi_probe_of': panel-dpi.c:(.text+0x35c): undefined reference to `of_find_backlight_by_node' This adds a Kconfig dependency like we have for the other OMAP display targets. Fixes: 39135a305a0f ("drm/omap: displays: panel-dpi: Support for handling backlight devices") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/displays/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/omapdrm/displays/Kconfig b/drivers/gpu/drm/omapdrm/displays/Kconfig index c226da145fb3c..a349cb61961e0 100644 --- a/drivers/gpu/drm/omapdrm/displays/Kconfig +++ b/drivers/gpu/drm/omapdrm/displays/Kconfig @@ -35,6 +35,7 @@ config DRM_OMAP_CONNECTOR_ANALOG_TV config DRM_OMAP_PANEL_DPI tristate "Generic DPI panel" + depends on BACKLIGHT_CLASS_DEVICE help Driver for generic DPI panels. From 23970e150a0a49f9a966c46e5d22fed06226098f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 20 Nov 2017 11:51:40 +0200 Subject: [PATCH 372/406] omapdrm: hdmi4: Correct the SoC revision matching I believe the intention of the commit 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") was to identify omap4430 ES1.x, omap4430 ES2.x and other OMAP4 revisions, like omap4460. By using family=OMAP4 in the match the code will treat omap4460 ES1.x in a same way as it would treat omap4430 ES1.x This breaks HDMI audio on OMAP4460 devices (PandaES for example). Correct the match rule so we are not going to get false positive match. Fixes: 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 62e451162d96f..b06f9956e7332 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -886,25 +886,36 @@ struct hdmi4_features { bool audio_use_mclk; }; -static const struct hdmi4_features hdmi4_es1_features = { +static const struct hdmi4_features hdmi4430_es1_features = { .cts_swmode = false, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es2_features = { +static const struct hdmi4_features hdmi4430_es2_features = { .cts_swmode = true, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es3_features = { +static const struct hdmi4_features hdmi4_features = { .cts_swmode = true, .audio_use_mclk = true, }; static const struct soc_device_attribute hdmi4_soc_devices[] = { - { .family = "OMAP4", .revision = "ES1.?", .data = &hdmi4_es1_features }, - { .family = "OMAP4", .revision = "ES2.?", .data = &hdmi4_es2_features }, - { .family = "OMAP4", .data = &hdmi4_es3_features }, + { + .machine = "OMAP4430", + .revision = "ES1.?", + .data = &hdmi4430_es1_features, + }, + { + .machine = "OMAP4430", + .revision = "ES2.?", + .data = &hdmi4430_es2_features, + }, + { + .family = "OMAP4", + .data = &hdmi4_features, + }, { /* sentinel */ } }; From bf25dac38f71d392a31ec074f55cbc941f1eaf1d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 16 Nov 2017 09:50:19 +0100 Subject: [PATCH 373/406] drm: omapdrm: Fix DPI on platforms using the DSI VDDS Commit d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") replaced usage of platform data version with SoC matching to configure DPI VDDS. The SoC match entries were incorrect, they should have matched on the machine name instead of the SoC family. Fix it. The result was observed on OpenPandora with OMAP3530 where the panel only had the Blue channel and Red&Green were missing. It was not observed on GTA04 with DM3730. Fixes: d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") Signed-off-by: Laurent Pinchart Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/dpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c index daf286fc8a408..ca1e3b489540f 100644 --- a/drivers/gpu/drm/omapdrm/dss/dpi.c +++ b/drivers/gpu/drm/omapdrm/dss/dpi.c @@ -566,8 +566,8 @@ static int dpi_verify_pll(struct dss_pll *pll) } static const struct soc_device_attribute dpi_soc_devices[] = { - { .family = "OMAP3[456]*" }, - { .family = "[AD]M37*" }, + { .machine = "OMAP3[456]*" }, + { .machine = "[AD]M37*" }, { /* sentinel */ } }; From bc2aba905211a8fc6da367e04595d1624f0fa156 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2017 09:27:05 +0300 Subject: [PATCH 374/406] omapdrm: hdmi4_cec: signedness bug in hdmi4_cec_init() "ret" needs to be signed for the error handling to work. Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support") Signed-off-by: Dan Carpenter Reviewed-by: Sebastian Reichel Acked-by: Hans Verkuil Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index d86873f2abe6a..e626eddf24d5e 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -352,7 +352,7 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core, { const u32 caps = CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS | CEC_CAP_PASSTHROUGH | CEC_CAP_RC; - unsigned int ret; + int ret; core->adap = cec_allocate_adapter(&hdmi_cec_adap_ops, core, "omap4", caps, CEC_MAX_LOG_ADDRS); From a152992062aa3803eeabfda84b5b844721ddf6ed Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Nov 2017 14:31:46 +0100 Subject: [PATCH 375/406] drm/imx: always call wait_for_flip_done in commit_tail drm_atomic_helper_wait_for_vblanks will go away in the future. The new drm_atomic_helper_setup_commit in 4.15 expects that blocking commits have completed flipping before the commit_tail returns. This must be ensured by calling wait_for_vblanks or wait_for_flip_done, where flip_done might do a less agressive wait, which is fine for imx-drm. Fixes: 080de2e5be2d (drm/atomic: Check for busy planes/connectors before setting the commit) Signed-off-by: Lucas Stach Reviewed-by: Daniel Vetter Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 93c7e3f9b4a88..17d2f3a1c562b 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -133,9 +133,16 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state) plane_disabling = true; } - if (plane_disabling) { - drm_atomic_helper_wait_for_vblanks(dev, state); + /* + * The flip done wait is only strictly required by imx-drm if a deferred + * plane disable is in-flight. As the core requires blocking commits + * to wait for the flip it is done here unconditionally. This keeps the + * workitem around a bit longer than required for the majority of + * non-blocking commits, but we accept that for the sake of simplicity. + */ + drm_atomic_helper_wait_for_flip_done(dev, state); + if (plane_disabling) { for_each_old_plane_in_state(state, plane, old_plane_state, i) ipu_plane_disable_deferred(plane); From 22a6c83777ac7c17d6c63891beeeac24cf5da450 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: [PATCH 376/406] xfs: ubsan fixes Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5a..b0cccf8a81a89 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1265,7 +1265,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); From 2d5f4b5bebccfe983715ebc9255151e611234643 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:22 -0800 Subject: [PATCH 377/406] xfs: remove unused parameter from xfs_writepage_map The first thing that xfs_writepage_map does is clobber the offset parameter. Since we never use the passed-in value, turn the parameter into a local variable. This gets rid of an UBSAN warning in generic/466. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index b0cccf8a81a89..21e2d70884e18 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -896,13 +896,13 @@ xfs_writepage_map( struct writeback_control *wbc, struct inode *inode, struct page *page, - loff_t offset, - uint64_t end_offset) + uint64_t end_offset) { LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; ssize_t len = i_blocksize(inode); + uint64_t offset; int error = 0; int count = 0; int uptodate = 1; @@ -1146,7 +1146,7 @@ xfs_do_writepage( end_offset = offset; } - return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); + return xfs_writepage_map(wpc, wbc, inode, page, end_offset); redirty: redirty_page_for_writepage(wbc, page); From 3b42d385753c22b29d259ccb9d4c3f419e583b30 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 21:40:19 -0800 Subject: [PATCH 378/406] xfs: scrub inode mode properly Since we've used up all the bits in i_mode, the existing mode check doesn't actually do anything useful. However, we've not used all the bit values in the format portion of i_mode, so we /do/ need to test that for bad values. Fixes: 80e4e1268 ("xfs: scrub inodes") Fixes-coverity-id: 1423992 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 637b7a892313d..f120fb20452f4 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -318,8 +318,20 @@ xfs_scrub_dinode( /* di_mode */ mode = be16_to_cpu(dip->di_mode); - if (mode & ~(S_IALLUGO | S_IFMT)) + switch (mode & S_IFMT) { + case S_IFLNK: + case S_IFREG: + case S_IFDIR: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + /* mode is recognized */ + break; + default: xfs_scrub_ino_set_corrupt(sc, ino, bp); + break; + } /* v1/v2 fields */ switch (dip->di_version) { From 373b0589dc8d58bc09c9a28d03611ae4fb216057 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: [PATCH 379/406] xfs: Properly retry failed dquot items in case of error during buffer writeback Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d57c2db64e593..f248708c10ff7 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -970,14 +970,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064b..664dea105e76f 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* From eb5b46faa693470681ec7c28cc2436edd1571198 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 30 Nov 2017 07:21:33 -0500 Subject: [PATCH 380/406] SUNRPC: Handle ENETDOWN errors Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 5 +++++ net/sunrpc/xprtsock.c | 1 + 2 files changed, 6 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a801da812f866..e2a4184f3c5df 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task) case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -ENOBUFS: @@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ECONNRESET: case -ECONNABORTED: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: @@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task) */ case -ECONNREFUSED: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: @@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task) switch (task->tk_status) { case 0: /* Success */ + case -ENETDOWN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: @@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c2b2d489b57ba..7fa94abfd6b2c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2439,6 +2439,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ case -ECONNREFUSED: case -ECONNRESET: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: From 5ddf755e4439833847a21bd5e2dc82d686679911 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:12 -0800 Subject: [PATCH 381/406] RISC-V: use generic serial.h Fixes this from allmodconfig: drivers/tty/serial/earlycon.c:27:10: fatal error: asm/serial.h: No such file or directory Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 18158be62a2bf..970460a0b492e 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h +generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h From 5e6f82b0fe7b7b4a204efeb0817fb8b0a2bc0373 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:13 -0800 Subject: [PATCH 382/406] RISC-V: use RISCV_{INT,SHORT} instead of {INT,SHORT} for asm macros INT and SHORT are used by some drivers that pull in the include files, so prefixing helps avoid namespace conflicts. Other constructs in the same file already uses this. Fixes, among others, these warnings with allmodconfig: ../sound/core/pcm_misc.c:43:0: warning: "INT" redefined #define INT __force int Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm.h | 12 ++++++------ arch/riscv/include/asm/bug.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 6cbbb6a68d76c..5ad4cb622bedf 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -58,17 +58,17 @@ #endif #if (__SIZEOF_INT__ == 4) -#define INT __ASM_STR(.word) -#define SZINT __ASM_STR(4) -#define LGINT __ASM_STR(2) +#define RISCV_INT __ASM_STR(.word) +#define RISCV_SZINT __ASM_STR(4) +#define RISCV_LGINT __ASM_STR(2) #else #error "Unexpected __SIZEOF_INT__" #endif #if (__SIZEOF_SHORT__ == 2) -#define SHORT __ASM_STR(.half) -#define SZSHORT __ASM_STR(2) -#define LGSHORT __ASM_STR(1) +#define RISCV_SHORT __ASM_STR(.half) +#define RISCV_SZSHORT __ASM_STR(2) +#define RISCV_LGSHORT __ASM_STR(1) #else #error "Unexpected __SIZEOF_SHORT__" #endif diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index c3e13764a943c..bfc7f099ab1fe 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -27,8 +27,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR INT " 1b - 2b" -#define __BUG_ENTRY_FILE INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" +#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" @@ -38,7 +38,7 @@ typedef u32 bug_insn_t; #define __BUG_ENTRY \ __BUG_ENTRY_ADDR "\n\t" \ __BUG_ENTRY_FILE "\n\t" \ - SHORT " %1" + RISCV_SHORT " %1" #else #define __BUG_ENTRY \ __BUG_ENTRY_ADDR From fe2726af9fdc93ecf2469b7c512fc1a8936e128c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:14 -0800 Subject: [PATCH 383/406] RISC-V: io.h: type fixes for warnings include for __iomem definition. Also, add volatile to iounmap() like other architectures have it to avoid "discarding volatile" warnings from some drivers. Finally, explicitly promote the base address for INB/OUTB functions to avoid some old legacy drivers complaining about int-to-ptr promotions. The drivers are unlikely to work but they're included in allmodconfig so the warnings are noisy. Fixes, among other warnings, these with allmodconfig: ../arch/riscv/include/asm/io.h:24:21: error: expected '=', ',', ';', 'asm' or '__attribute__' before '*' token extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); sound/pci/echoaudio/echoaudio.c: In function 'snd_echo_free': sound/pci/echoaudio/echoaudio.c:1879:10: warning: passing argument 1 of 'iounmap' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers] Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 16 +++++++++------- arch/riscv/mm/ioremap.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79bb..2a5c7c371e2da 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -19,6 +19,8 @@ #ifndef _ASM_RISCV_IO_H #define _ASM_RISCV_IO_H +#include + #ifdef CONFIG_MMU extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); #define ioremap_wc(addr, size) ioremap((addr), (size)) #define ioremap_wt(addr, size) ioremap((addr), (size)) -extern void iounmap(void __iomem *addr); +extern void iounmap(volatile void __iomem *addr); #endif /* CONFIG_MMU */ @@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar()) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) -#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) +#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) +#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) +#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) +#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) +#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar()) diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c index e99194a4077ec..70ef2724cdf61 100644 --- a/arch/riscv/mm/ioremap.c +++ b/arch/riscv/mm/ioremap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap); * * Caller must ensure there is only one unmapping for the same pointer. */ -void iounmap(void __iomem *addr) +void iounmap(volatile void __iomem *addr) { vunmap((void *)((unsigned long)addr & PAGE_MASK)); } From 83e7b8769a08987f47117f3a065de153c839a0a8 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:15 -0800 Subject: [PATCH 384/406] RISC-V: move empty_zero_page definition to C and export it Needed by some modules (exported by other architectures). Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 3 --- arch/riscv/kernel/setup.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 76af908f87c18..78f670d701339 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -152,6 +152,3 @@ END(_start) __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE -ENTRY(empty_zero_page) - .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00 -END(empty_zero_page) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index de7db114c3153..4375375ad168a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -60,6 +60,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; unsigned long va_pa_offset; unsigned long pfn_base; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; From 24948b7ec0f31d36dc900088b140c4f9551b6f56 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:16 -0800 Subject: [PATCH 385/406] RISC-V: Export some expected symbols for modules These are the ones needed by current allmodconfig, so add them instead of everything other architectures are exporting -- the rest can be added on demand later if needed. Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/riscv_ksyms.c | 3 +++ arch/riscv/kernel/setup.c | 2 ++ arch/riscv/lib/delay.c | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 23cc81ec9e944..5517342487489 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,4 +12,7 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4375375ad168a..8fbb6749910d4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -58,7 +58,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; #endif /* CONFIG_CMDLINE_BOOL */ unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 1cc4ac3964b4c..dce8ae24c6d33 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -84,6 +84,7 @@ void __delay(unsigned long cycles) while ((unsigned long)(get_cycles() - t0) < cycles) cpu_relax(); } +EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { From 4bde63286a6c7c76fe05ff0e03ad253f5260b104 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:17 -0800 Subject: [PATCH 386/406] RISC-V: Provide stub of setup_profiling_timer() Fixes the following on allmodconfig build: profile.c:(.text+0x3e4): undefined reference to `setup_profiling_timer' Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f6..17014c68e9fea 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -38,6 +38,13 @@ enum ipi_message_type { IPI_MAX }; + +/* Unsupported */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + irqreturn_t handle_ipi(void) { unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; From 4a41d5dbb0bbd0c3faffb2ccd8ef1a7aeb12f978 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:18 -0800 Subject: [PATCH 387/406] RISC-V: Use define for get_cycles like other architectures Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/timex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 3df4932d8964f..2f26989cb864b 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -18,7 +18,7 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles(void) +static inline cycles_t get_cycles_inline(void) { cycles_t n; @@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void) : "=r" (n)); return n; } +#define get_cycles get_cycles_inline #ifdef CONFIG_64BIT static inline uint64_t get_cycles64(void) From 741fc3ff3a49509a7092d6d9eb51da6bd7577278 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:20 -0800 Subject: [PATCH 388/406] RISC-V: Add missing include Fixes: include/asm-generic/mm_hooks.h:20:11: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/mm_hooks.h:19:38: warning: 'struct mm_struct' declared inside parameter list will not be visible outside of this definition or declaration Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc43..1cd5172882c2a 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -14,6 +14,7 @@ #ifndef _ASM_RISCV_MMU_CONTEXT_H #define _ASM_RISCV_MMU_CONTEXT_H +#include #include #include From 08f051eda33b51e8ee0f45f05bcfe49d0f0caf6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:30:32 -0700 Subject: [PATCH 389/406] RISC-V: Flush I$ when making a dirty page executable The RISC-V ISA allows for instruction caches that are not coherent WRT stores, even on a single hart. As a result, we need to explicitly flush the instruction cache whenever marking a dirty page as executable in order to preserve the correct system behavior. Local instruction caches aren't that scary (our implementations actually flush the cache, but RISC-V is defined to allow higher-performance implementations to exist), but RISC-V defines no way to perform an instruction cache shootdown. When explicitly asked to do so we can shoot down remote instruction caches via an IPI, but this is a bit on the slow side. Instead of requiring an IPI to all harts whenever marking a page as executable, we simply flush the currently running harts. In order to maintain correct behavior, we additionally mark every other hart as needing a deferred instruction cache which will be taken before anything runs on it. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 24 ++++++++++-- arch/riscv/include/asm/mmu.h | 4 ++ arch/riscv/include/asm/mmu_context.h | 44 +++++++++++++++++++++ arch/riscv/include/asm/pgtable.h | 58 +++++++++++++++------------- arch/riscv/include/asm/tlbflush.h | 2 + arch/riscv/kernel/smp.c | 48 +++++++++++++++++++++++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/cacheflush.c | 23 +++++++++++ 8 files changed, 174 insertions(+), 30 deletions(-) create mode 100644 arch/riscv/mm/cacheflush.c diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0595585013b07..5c9ed39ee2a2d 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -18,21 +18,37 @@ #undef flush_icache_range #undef flush_icache_user_range +#undef flush_dcache_page static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); } +#define PG_dcache_clean PG_arch_1 + +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range(start, end) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() + #ifndef CONFIG_SMP -#define flush_icache_range(start, end) local_flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() +#define flush_icache_all() local_flush_icache_all() +#define flush_icache_mm(mm, local) flush_icache_all() #else /* CONFIG_SMP */ -#define flush_icache_range(start, end) sbi_remote_fence_i(0) -#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(0) +void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 66805cba9a27a..5df2dccdba122 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,10 @@ typedef struct { void *vdso; +#ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; +#endif } mm_context_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc43..b15b169e3d222 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,6 +20,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *task) @@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd) csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); } +/* + * When necessary, performs a deferred icache flush for the given MM context, + * on the local CPU. RISC-V has no direct mechanism for instruction cache + * shoot downs, so instead we send an IPI that informs the remote harts they + * need to flush their local instruction caches. To avoid pathologically slow + * behavior in a common case (a bunch of single-hart processes on a many-hart + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently + * executing a MM context and instead schedule a deferred local instruction + * cache flush to be performed before execution resumes on each hart. This + * actually performs that local instruction cache flush, which implicitly only + * refers to the current hart. + */ +static inline void flush_icache_deferred(struct mm_struct *mm) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_flush_icache_all(); + } +#endif +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *task) { if (likely(prev != next)) { + /* + * Mark the current MM context as inactive, and the next as + * active. This is at least used by the icache flushing + * routines in order to determine who should + */ + unsigned int cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + set_pgdir(next->pgd); local_flush_tlb_all(); + + flush_icache_deferred(next); } } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 3399257780b2c..2cbd92ed1629c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) ((void)(pte)) -/* - * Certain architectures need to do special things when PTEs within - * a page table are directly modified. Thus, the following hook is - * made available. - */ -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - -static inline void pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - set_pte_at(mm, addr, ptep, __pte(0)); -} - static inline int pte_present(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT); @@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) return (pte_val(pte) == 0); } -/* static inline int pte_read(pte_t pte) */ - static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXEC; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); } -/* static inline int pte_exec(pte_t pte) */ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; @@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) return pte_val(pte_a) == pte_val(pte_b); } +/* + * Certain architectures need to do special things when PTEs within + * a page table are directly modified. Thus, the following hook is + * made available. + */ +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +void flush_icache_pte(pte_t pte); + +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + if (pte_present(pteval) && pte_exec(pteval)) + flush_icache_pte(pteval); + + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e5..77edf2826c1f8 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,6 +17,8 @@ #ifdef CONFIG_MMU +#include + /* Flush entire local TLB */ static inline void local_flush_tlb_all(void) { diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f6..1b27ade437b49 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu) { send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } + +/* + * Performs an icache flush for the given MM context. RISC-V has no direct + * mechanism for instruction cache shoot downs, so instead we send an IPI that + * informs the remote harts they need to flush their local instruction caches. + * To avoid pathologically slow behavior in a common case (a bunch of + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the + * IPIs for harts that are not currently executing a MM context and instead + * schedule a deferred local instruction cache flush to be performed before + * execution resumes on each hart. + */ +void flush_icache_mm(struct mm_struct *mm, bool local) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_flush_icache_all(); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + local |= cpumask_empty(&others); + if (mm != current->active_mm || !local) + sbi_remote_fence_i(others.bits); + else { + /* + * It's assumed that at least one strongly ordered operation is + * performed on this hart between setting a hart's cpumask bit + * and scheduling this MM context on that hart. Sending an SBI + * remote message will do this, but in the case where no + * messages are sent we still need to order this hart's writes + * with flush_icache_deferred(). + */ + smp_mb(); + } + + preempt_enable(); +} diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 81f7d9ce6d881..eb22ab49b3e00 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,3 +2,4 @@ obj-y += init.o obj-y += fault.o obj-y += extable.o obj-y += ioremap.o +obj-y += cacheflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c new file mode 100644 index 0000000000000..498c0a0814fe3 --- /dev/null +++ b/arch/riscv/mm/cacheflush.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +void flush_icache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + flush_icache_all(); +} From 921ebd8f2c081b3cf6c3b29ef4103eef3ff26054 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:32:16 -0700 Subject: [PATCH 390/406] RISC-V: Allow userspace to flush the instruction cache Despite RISC-V having a direct 'fence.i' instruction available to userspace (which we can't trap!), that's not actually viable when running on Linux because the kernel might schedule a process on another hart. There is no way for userspace to handle this without invoking the kernel (as it doesn't know the thread->hart mappings), so we've defined a RISC-V specific system call to flush the instruction cache. This patch adds both a system call and a VDSO entry. If possible, we'd like to avoid having the system call be considered part of the user-facing ABI and instead restrict that to the VDSO entry -- both just in general to avoid having additional user-visible ABI to maintain, and because we'd prefer that users just call the VDSO entry because there might be a better way to do this in the future (ie, one that doesn't require entering the kernel). Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 6 +++++ arch/riscv/include/asm/vdso-syscalls.h | 28 ++++++++++++++++++++++ arch/riscv/include/asm/vdso.h | 4 ++++ arch/riscv/kernel/sys_riscv.c | 32 ++++++++++++++++++++++++++ arch/riscv/kernel/syscall_table.c | 2 ++ arch/riscv/kernel/vdso/Makefile | 1 + arch/riscv/kernel/vdso/flush_icache.S | 31 +++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 1 + 8 files changed, 105 insertions(+) create mode 100644 arch/riscv/include/asm/vdso-syscalls.h create mode 100644 arch/riscv/kernel/vdso/flush_icache.S diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 5c9ed39ee2a2d..efd89a88d2d0e 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -52,4 +52,10 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * Bits in sys_riscv_flush_icache()'s flags argument. + */ +#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL +#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) + #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h new file mode 100644 index 0000000000000..a2ccf18949295 --- /dev/null +++ b/arch/riscv/include/asm/vdso-syscalls.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ASM_RISCV_VDSO_SYSCALLS_H +#define _ASM_RISCV_VDSO_SYSCALLS_H + +#ifdef CONFIG_SMP + +/* These syscalls are only used by the vDSO and are not in the uapi. */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif + +#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 602f612575537..541544d64c33b 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -38,4 +38,8 @@ struct vdso_data { (void __user *)((unsigned long)(base) + __vdso_##name); \ }) +#ifdef CONFIG_SMP +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif + #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 4351be7d0533a..c6c037eabaf6c 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -16,6 +16,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -47,3 +48,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } #endif /* !CONFIG_64BIT */ + +#ifdef CONFIG_SMP +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + struct mm_struct *mm = current->mm; + bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; + + /* Check the reserved flags. */ + if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(mm, local); + + return 0; +} +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index 4e30dc5fb5935..a5bd6401f95e6 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -22,4 +23,5 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include +#include }; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 2dcc4f3070bc7..324568d339213 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -6,6 +6,7 @@ vdso-syms += gettimeofday vdso-syms += clock_gettime vdso-syms += clock_getres vdso-syms += getcpu +vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 0000000000000..b0fbad74e873e --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index c7543c6a00f9c..cd1d47e0724ba 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -74,6 +74,7 @@ VERSION __vdso_clock_gettime; __vdso_clock_getres; __vdso_getcpu; + __vdso_flush_icache; local: *; }; } From 0e710ac6521f13c68a5c634471f40ae448c31e0a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 21 Nov 2017 07:59:28 -0800 Subject: [PATCH 391/406] RISC-V: Clean up an unused include We used to have some cmpxchg syscalls. They're no longer there, so we no longer need the include. CC: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c6c037eabaf6c..a2ae936a093e4 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -14,7 +14,6 @@ */ #include -#include #include #include From 68615eb01f82256c19e41967bfb3eef902f77033 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 27 Nov 2017 17:31:00 +0100 Subject: [PATCH 392/406] hwmon: (jc42) optionally try to disable the SMBUS timeout With a nxp,se97 chip on an atmel sama5d31 board, the I2C adapter driver is not always capable of avoiding the 25-35 ms timeout as specified by the SMBUS protocol. This may cause silent corruption of the last bit of any transfer, e.g. a one is read instead of a zero if the sensor chip times out. This also affects the eeprom half of the nxp-se97 chip, where this silent corruption was originally noticed. Other I2C adapters probably suffer similar issues, e.g. bit-banging comes to mind as risky... The SMBUS register in the nxp chip is not a standard Jedec register, but it is not special to the nxp chips either, at least the atmel chips have the same mechanism. Therefore, do not special case this on the manufacturer, it is opt-in via the device property anyway. Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Peter Rosin Acked-by: Rob Herring Signed-off-by: Guenter Roeck --- .../devicetree/bindings/hwmon/jc42.txt | 4 ++++ drivers/hwmon/jc42.c | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt index 07a250498fbb4..f569db58f64a1 100644 --- a/Documentation/devicetree/bindings/hwmon/jc42.txt +++ b/Documentation/devicetree/bindings/hwmon/jc42.txt @@ -34,6 +34,10 @@ Required properties: - reg: I2C address +Optional properties: +- smbus-timeout-disable: When set, the smbus timeout function will be disabled. + This is not supported on all chips. + Example: temp-sensor@1a { diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 5f11dc014ed61..e5234f953a6d1 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -45,6 +46,7 @@ static const unsigned short normal_i2c[] = { #define JC42_REG_TEMP 0x05 #define JC42_REG_MANID 0x06 #define JC42_REG_DEVICEID 0x07 +#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */ /* Status bits in temperature register */ #define JC42_ALARM_CRIT_BIT 15 @@ -75,6 +77,9 @@ static const unsigned short normal_i2c[] = { #define GT_MANID 0x1c68 /* Giantec */ #define GT_MANID2 0x132d /* Giantec, 2nd mfg ID */ +/* SMBUS register */ +#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */ + /* Supported chips */ /* Analog Devices */ @@ -495,6 +500,22 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) data->extended = !!(cap & JC42_CAP_RANGE); + if (device_property_read_bool(dev, "smbus-timeout-disable")) { + int smbus; + + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ + smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); + if (smbus < 0) + return smbus; + i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, + smbus | SMBUS_STMOUT); + } + config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); if (config < 0) return config; From 1bcab12521d9b23dbaa22ac71184778dcc43e2f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: [PATCH 393/406] afs: Fix permit refcounting Fix four refcount bugs in afs_cache_permit(): (1) When checking the result of the kzalloc(), we can't just return, but must put 'permits'. (2) We shouldn't put permits immediately after hashing a new permit as we need to keep the pointer stable so that we can check to see if vnode->permit_cache has changed before we decide whether to assign to it. (3) 'permits' is being put twice. (4) We need to put either the replacement or the thing replaced after the assignment to vnode->permit_cache. Without this, lots of the following are seen: Kernel BUG at ffffffffa039857b [verbose debug info unavailable] ------------[ cut here ]------------ Kernel BUG at ffffffffa039858a [verbose debug info unavailable] ------------[ cut here ]------------ The addresses are in the .text..refcount section of the kafs.ko module. Following the relocation records for the __ex_table section shows one to be due to the decrement in afs_put_permits() and the other to be key_get() in afs_cache_permit(). Occasionally, the following is seen: refcount_t overflow at afs_cache_permit+0x57d/0x5c0 [kafs] in cc1[562], uid/euid: 0/0 WARNING: CPU: 0 PID: 562 at kernel/panic.c:657 refcount_error_report+0x9c/0xac ... Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/security.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 2b00097101b37..b88b7d45fdaa0 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits) void afs_cache_permit(struct afs_vnode *vnode, struct key *key, unsigned int cb_break) { - struct afs_permits *permits, *xpermits, *replacement, *new = NULL; + struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); size_t size = 0; bool changed = false; @@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, new = kzalloc(sizeof(struct afs_permits) + sizeof(struct afs_permit) * size, GFP_NOFS); if (!new) - return; + goto out_put; refcount_set(&new->usage, 1); new->nr_permits = size; @@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, afs_hash_permits(new); - afs_put_permits(permits); - /* Now see if the permit list we want is actually already available */ spin_lock(&afs_permits_lock); @@ -262,11 +260,15 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, kfree(new); spin_lock(&vnode->lock); - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || - permits != rcu_access_pointer(vnode->permit_cache)) - goto someone_else_changed_it_unlock; - rcu_assign_pointer(vnode->permit_cache, replacement); + zap = rcu_access_pointer(vnode->permit_cache); + if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + zap == permits) + rcu_assign_pointer(vnode->permit_cache, replacement); + else + zap = replacement; spin_unlock(&vnode->lock); + afs_put_permits(zap); +out_put: afs_put_permits(permits); return; From f8de483e7440b0d23ce6372b3ef8358841c8827b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: [PATCH 394/406] afs: Properly reset afs_vnode (inode) fields When an AFS inode is allocated by afs_alloc_inode(), the allocated afs_vnode struct isn't necessarily reset from the last time it was used as an inode because the slab constructor is only invoked once when the memory is obtained from the page allocator. This means that information can leak from one inode to the next because we're not calling kmem_cache_zalloc(). Some of the information isn't reset, in particular the permit cache pointer. Bring the clearances up to date. Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/internal.h | 5 ++++- fs/afs/super.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e03910cebdd49..804d1f9056220 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -441,7 +441,10 @@ enum afs_lock_state { }; /* - * AFS inode private data + * AFS inode private data. + * + * Note that afs_alloc_inode() *must* reset anything that could incorrectly + * leak from one inode to another. */ struct afs_vnode { struct inode vfs_inode; /* the VFS's inode record */ diff --git a/fs/afs/super.c b/fs/afs/super.c index d3f97da61bdfc..1037dd41a6221 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb) } /* - * initialise an inode cache slab element prior to any use + * Initialise an inode cache slab element prior to any use. Note that + * afs_alloc_inode() *must* reset anything that could incorrectly leak from one + * inode to another. */ static void afs_i_init_once(void *_vnode) { @@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb) atomic_inc(&afs_count_active_inodes); + /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; + vnode->lock_key = NULL; + vnode->permit_cache = NULL; + vnode->cb_interest = NULL; +#ifdef CONFIG_AFS_FSCACHE + vnode->cache = NULL; +#endif + vnode->flags = 1 << AFS_VNODE_UNSET; + vnode->cb_type = 0; + vnode->lock_state = AFS_VNODE_LOCK_NONE; _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; From 7e8b9c1d2e2f5f45db7d40b50d14f606097c25de Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:29 +0000 Subject: [PATCH 395/406] arm64: module-plts: factor out PLT generation code for ftrace To allow the ftrace trampoline code to reuse the PLT entry routines, factor it out and move it into asm/module.h. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 44 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/module-plts.c | 38 ++-------------------------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8d..11d4aaee82e11 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409b..ebff6c155cac2 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include #include -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; From be0f272bfc83797f70d44faca86954df62e2bbc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:30 +0000 Subject: [PATCH 396/406] arm64: ftrace: emit ftrace-mod.o contents through code When building the arm64 kernel with both CONFIG_ARM64_MODULE_PLTS and CONFIG_DYNAMIC_FTRACE enabled, the ftrace-mod.o object file is built with the kernel and contains a trampoline that is linked into each module, so that modules can be loaded far away from the kernel and still reach the ftrace entry point in the core kernel with an ordinary relative branch, as is emitted by the compiler instrumentation code dynamic ftrace relies on. In order to be able to build out of tree modules, this object file needs to be included into the linux-headers or linux-devel packages, which is undesirable, as it makes arm64 a special case (although a precedent does exist for 32-bit PPC). Given that the trampoline essentially consists of a PLT entry, let's not bother with a source or object file for it, and simply patch it in whenever the trampoline is being populated, using the existing PLT support routines. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 --- arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/Makefile | 3 --- arch/arm64/kernel/ftrace-mod.S | 18 ------------------ arch/arm64/kernel/ftrace.c | 14 ++++++++------ arch/arm64/kernel/module-plts.c | 12 ++++++++++++ arch/arm64/kernel/module.lds | 1 + 7 files changed, 22 insertions(+), 31 deletions(-) delete mode 100644 arch/arm64/kernel/ftrace-mod.S diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1c..b481b4a7c0111 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 11d4aaee82e11..4f766178fa6ff 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd7908952..067baace74a09 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff8..0000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5ba..50986e388d2b2 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ebff6c155cac2..ea640f92fe5ad 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -120,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -131,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -181,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48b..22e36a21c1134 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } From a349b30250634da20950eb91e2551dcd81f1805d Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Wed, 22 Nov 2017 21:43:59 +0900 Subject: [PATCH 397/406] arm64: pgd: Mark pgd_cache as __ro_after_init pgd_cache is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init Signed-off-by: Jinbum Park Signed-off-by: Will Deacon --- arch/arm64/mm/pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a1708..051e71ec3335e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include #include -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { From 9de52a755cfb6da5ee21a07e3a868bdc8fbfccb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 30 Nov 2017 11:56:37 +0000 Subject: [PATCH 398/406] arm64: fpsimd: Fix failure to restore FPSIMD state after signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fpsimd_update_current_state() function is responsible for loading the FPSIMD state from the user signal frame into the current task during sigreturn. When implementing support for SVE, conditional code was added to this function in order to handle the case where SVE state need to be loaded for the task and merged with the FPSIMD data from the signal frame; however, the FPSIMD-only case was unintentionally dropped. As a result of this, sigreturn does not currently restore the FPSIMD state of the task, except in the case where the system supports SVE and the signal frame contains SVE state in addition to FPSIMD state. This patch fixes this bug by making the copy-in of the FPSIMD data from the signal frame to thread_struct unconditional. This remains a performance regression from v4.14, since the FPSIMD state is now copied into thread_struct and then loaded back, instead of _only_ being loaded into the CPU FPSIMD registers. However, it is essential to call task_fpsimd_load() here anyway in order to ensure that the SVE enable bit in CPACR_EL1 is set correctly before returning to userspace. This could use some refactoring, but since sigreturn is not a fast path I have kept this patch as a pure fix and left the refactoring for later. Cc: Catalin Marinas Cc: Ard Biesheuvel Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Reported-by: Alex Bennée Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e6..5084e699447a4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } + task_fpsimd_load(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { From f8ada189550984ee21f27be736042b74a7da1d68 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Wed, 15 Nov 2017 15:39:26 +0800 Subject: [PATCH 399/406] arm64: perf: remove unsupported events for Cortex-A73 bus access read/write events are not supported in A73, based on the Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73" Acked-by: Julien Thierry Signed-off-by: Xu YiPing Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff8..3affca3dd96a3 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] From 770ba06084f7aeadea120922c775d574f3128ba3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 29 Nov 2017 17:03:03 +0300 Subject: [PATCH 400/406] arm64: cpu_ops: Add missing 'const' qualifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the kernel with an LTO-enabled GCC spits out the following "const" warning for the cpu_ops code: mm/percpu.c:2168:20: error: pcpu_fc_names causes a section type conflict with dt_supported_cpu_ops const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = { ^ arch/arm64/kernel/cpu_ops.c:34:37: note: ‘dt_supported_cpu_ops’ was declared here static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { Fix it by adding missed const qualifiers. Signed-off-by: Yury Norov Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b3..ea001241bdd47 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; From 3a33c7605750fb6a87613044d16b1455e482414d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Nov 2017 18:25:17 +0000 Subject: [PATCH 401/406] arm64: context: Fix comments and remove pointless smp_wmb() The comments in the ASID allocator incorrectly hint at an MP-style idiom using the asid_generation and the active_asids array. In fact, the synchronisation is achieved using a combination of an xchg operation and a spinlock, so update the comments and remove the pointless smp_wmb(). Cc: James Morse Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 28a45a19aae7f..6f4017046323f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -205,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) From 4db2b604c05afc3d2678fe01d3136c015df313ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Nov 2017 11:47:28 +0100 Subject: [PATCH 402/406] move libgcc.h to include/linux Introducing a new include/lib directory just for this file totally messes up tab completion for include/linux, which is highly annoying. Move it to include/linux where we have headers for all kinds of other lib/ code as well. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- include/{lib => linux}/libgcc.h | 0 lib/ashldi3.c | 2 +- lib/ashrdi3.c | 2 +- lib/cmpdi2.c | 2 +- lib/lshrdi3.c | 2 +- lib/muldi3.c | 2 +- lib/ucmpdi2.c | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename include/{lib => linux}/libgcc.h (100%) diff --git a/include/lib/libgcc.h b/include/linux/libgcc.h similarity index 100% rename from include/lib/libgcc.h rename to include/linux/libgcc.h diff --git a/lib/ashldi3.c b/lib/ashldi3.c index 1b6087db95a54..3ffc46e3bb6c8 100644 --- a/lib/ashldi3.c +++ b/lib/ashldi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashldi3(long long u, word_type b) { diff --git a/lib/ashrdi3.c b/lib/ashrdi3.c index 2e67c97ac65a9..ea054550f0e80 100644 --- a/lib/ashrdi3.c +++ b/lib/ashrdi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashrdi3(long long u, word_type b) { diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c index 6d7ebf6c2b862..2250da7e503eb 100644 --- a/lib/cmpdi2.c +++ b/lib/cmpdi2.c @@ -16,7 +16,7 @@ #include -#include +#include word_type notrace __cmpdi2(long long a, long long b) { diff --git a/lib/lshrdi3.c b/lib/lshrdi3.c index 8e845f4bb65f4..99cfa5721f2d2 100644 --- a/lib/lshrdi3.c +++ b/lib/lshrdi3.c @@ -17,7 +17,7 @@ */ #include -#include +#include long long notrace __lshrdi3(long long u, word_type b) { diff --git a/lib/muldi3.c b/lib/muldi3.c index 88938543e10a6..54c8b3123376b 100644 --- a/lib/muldi3.c +++ b/lib/muldi3.c @@ -15,7 +15,7 @@ */ #include -#include +#include #define W_TYPE_SIZE 32 diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c index 49a53505c8e35..25ca2d4c1e191 100644 --- a/lib/ucmpdi2.c +++ b/lib/ucmpdi2.c @@ -15,7 +15,7 @@ */ #include -#include +#include word_type __ucmpdi2(unsigned long long a, unsigned long long b) { From da894ff100be9044c490f47f61541481b4f42b1f Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:15:32 -0800 Subject: [PATCH 403/406] RISC-V: __io_writes should respect the length argument Whoops -- I must have just been being an idiot again. Thanks to Segher for finding the bug :). CC: Segher Boessenkool Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79bb..507ee6a16e679 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -250,7 +250,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) const ctype *buf = buffer; \ \ do { \ - __raw_writeq(*buf++, addr); \ + __raw_write ## len(*buf++, addr); \ } while (--count); \ } \ afence; \ From ae64f9bd1d3621b5e60d7363bc20afb46aede215 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Dec 2017 11:01:47 -0500 Subject: [PATCH 404/406] Linux 4.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f761bf475ba52..c988e46a53cd7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* From 8f6d6016898615012ec92957fba65a23b7ae16c9 Mon Sep 17 00:00:00 2001 From: Adrian Fiergolski Date: Mon, 25 Dec 2017 22:26:46 +0100 Subject: [PATCH 405/406] i2c: mux: pca954x: add support for NXP PCA984x family This patch extends the current i2c-mux-pca954x driver and adds support for a newer PCA984x family of the I2C switches and multiplexers from NXP. Signed-off-by: Adrian Fiergolski Reviewed-by: Rob Herring Signed-off-by: Peter Rosin --- .../bindings/i2c/i2c-mux-pca954x.txt | 13 ++++++- drivers/i2c/muxes/Kconfig | 6 +-- drivers/i2c/muxes/i2c-mux-pca954x.c | 38 +++++++++++++++++-- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt index aa097045a10ec..34d91501342e1 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt @@ -1,10 +1,19 @@ * NXP PCA954x I2C bus switch +The driver supports NXP PCA954x and PCA984x I2C mux/switch devices. + Required Properties: - compatible: Must contain one of the following. - "nxp,pca9540", "nxp,pca9542", "nxp,pca9543", "nxp,pca9544", - "nxp,pca9545", "nxp,pca9546", "nxp,pca9547", "nxp,pca9548" + "nxp,pca9540", + "nxp,pca9542", + "nxp,pca9543", + "nxp,pca9544", + "nxp,pca9545", + "nxp,pca9546", "nxp,pca9846", + "nxp,pca9547", "nxp,pca9847", + "nxp,pca9548", "nxp,pca9848", + "nxp,pca9849" - reg: The I2C address of the device. diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index 0f5c8fc36625b..52a4a922e7e62 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -64,11 +64,11 @@ config I2C_MUX_PCA9541 will be called i2c-mux-pca9541. config I2C_MUX_PCA954x - tristate "Philips PCA954x I2C Mux/switches" + tristate "NXP PCA954x and PCA984x I2C Mux/switches" depends on GPIOLIB || COMPILE_TEST help - If you say yes here you get support for the Philips PCA954x - I2C mux/switch devices. + If you say yes here you get support for the NXP PCA954x + and PCA984x I2C mux/switch devices. This driver can also be built as a module. If so, the module will be called i2c-mux-pca954x. diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 2ca068d8b92d2..fbb84c7ef2829 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -4,11 +4,11 @@ * Copyright (c) 2008-2009 Rodolfo Giometti * Copyright (c) 2008-2009 Eurotech S.p.A. * - * This module supports the PCA954x series of I2C multiplexer/switch chips - * made by Philips Semiconductors. + * This module supports the PCA954x and PCA954x series of I2C multiplexer/switch + * chips made by NXP Semiconductors. * This includes the: - * PCA9540, PCA9542, PCA9543, PCA9544, PCA9545, PCA9546, PCA9547 - * and PCA9548. + * PCA9540, PCA9542, PCA9543, PCA9544, PCA9545, PCA9546, PCA9547, + * PCA9548, PCA9846, PCA9847, PCA9848 and PCA9849. * * These chips are all controlled via the I2C bus itself, and all have a * single 8-bit register. The upstream "parent" bus fans out to two, @@ -63,6 +63,10 @@ enum pca_type { pca_9546, pca_9547, pca_9548, + pca_9846, + pca_9847, + pca_9848, + pca_9849, }; struct chip_desc { @@ -129,6 +133,24 @@ static const struct chip_desc chips[] = { .nchans = 8, .muxtype = pca954x_isswi, }, + [pca_9846] = { + .nchans = 4, + .muxtype = pca954x_isswi, + }, + [pca_9847] = { + .nchans = 8, + .enable = 0x8, + .muxtype = pca954x_ismux, + }, + [pca_9848] = { + .nchans = 8, + .muxtype = pca954x_isswi, + }, + [pca_9849] = { + .nchans = 4, + .enable = 0x4, + .muxtype = pca954x_ismux, + }, }; static const struct i2c_device_id pca954x_id[] = { @@ -140,6 +162,10 @@ static const struct i2c_device_id pca954x_id[] = { { "pca9546", pca_9546 }, { "pca9547", pca_9547 }, { "pca9548", pca_9548 }, + { "pca9846", pca_9846 }, + { "pca9847", pca_9847 }, + { "pca9848", pca_9848 }, + { "pca9849", pca_9849 }, { } }; MODULE_DEVICE_TABLE(i2c, pca954x_id); @@ -154,6 +180,10 @@ static const struct of_device_id pca954x_of_match[] = { { .compatible = "nxp,pca9546", .data = &chips[pca_9546] }, { .compatible = "nxp,pca9547", .data = &chips[pca_9547] }, { .compatible = "nxp,pca9548", .data = &chips[pca_9548] }, + { .compatible = "nxp,pca9846", .data = &chips[pca_9846] }, + { .compatible = "nxp,pca9847", .data = &chips[pca_9847] }, + { .compatible = "nxp,pca9848", .data = &chips[pca_9848] }, + { .compatible = "nxp,pca9849", .data = &chips[pca_9849] }, {} }; MODULE_DEVICE_TABLE(of, pca954x_of_match); From ac5b85de17cb96445c51bd1a1c53c3f675582f26 Mon Sep 17 00:00:00 2001 From: Tomasz Bachorski Date: Mon, 18 Dec 2017 17:18:39 +0100 Subject: [PATCH 406/406] i2c: mux: reg: don't log an error for probe deferral It's possible that i2c_mux_reg_probe_dt() could return -EPROBE_DEFER. In that case, driver will request a probe deferral and an error suggesting device tree parsing problem will be reported. This is a pretty confusing information. Let's change the error handling, so driver will be able to request probe deferral without logging not related errors. Signed-off-by: Tomasz Bachorski Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-reg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index f6c9c3dc6cad9..c948e5a4cb045 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -177,6 +177,9 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) sizeof(mux->data)); } else { ret = i2c_mux_reg_probe_dt(mux, pdev); + if (ret == -EPROBE_DEFER) + return ret; + if (ret < 0) { dev_err(&pdev->dev, "Error parsing device tree"); return ret;