From da25f490a0b3d9c234d659b21024d61fe2d08c8e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 Oct 2017 11:43:34 +0300 Subject: microblaze: add missing include to mmu_context_mm.h mmu_context_mm.h is using struct task_struct, which is defined in linux/sched.h. Source files that include mm_context_mm.h (directly or indirectly) and doesn't include linux/sched.h will generate an error. An example of that is drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c This patch adds an include of linux/sched.h to mmu_context_mm.h to avoid such errors. Signed-off-by: Oded Gabbay Signed-off-by: Michal Simek --- arch/microblaze/include/asm/mmu_context_mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h index 99472d2ca340..97559fe0b953 100644 --- a/arch/microblaze/include/asm/mmu_context_mm.h +++ b/arch/microblaze/include/asm/mmu_context_mm.h @@ -13,6 +13,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From 253696ccd613fbdaa5aba1de44c461a058e0a114 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 10 Nov 2017 02:05:06 +0100 Subject: drm/vc4: Account for interrupts in flight Synchronously disable the IRQ to make the following cancel_work_sync invocation effective. An interrupt in flight could enqueue further overflow mem work. As we free the binner BO immediately following vc4_irq_uninstall this caused a NULL pointer dereference in the work callback vc4_overflow_mem_work. Link: https://github.com/anholt/linux/issues/114 Signed-off-by: Stefan Schake Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1510275907-993-2-git-send-email-stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 7d7af3a93d94..61b2e5377993 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,6 +208,9 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + /* Undo the effects of a previous vc4_irq_uninstall. */ + enable_irq(dev->irq); + /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); @@ -225,6 +228,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } -- cgit v1.2.3 From f82b735936ffd58b6711cf1f1054616517d8ffcd Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 23 Oct 2017 23:18:16 -0600 Subject: Btrfs: add write_flags for compression bio Compression code path has only flaged bios with REQ_OP_WRITE no matter where the bios come from, but it could be a sync write if fsync starts this writeback or a normal writeback write if wb kthread starts a periodic writeback. It breaks the rule that sync writes and writeback writes need to be differentiated from each other, because from the POV of block layer, all bios need to be recognized by these flags in order to do some management, e.g. throttlling. This passes writeback_control to compression write path so that it can send bios with proper flags to block layer. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 7 ++++--- fs/btrfs/compression.h | 3 ++- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 3 ++- fs/btrfs/inode.c | 15 +++++++++++---- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b35ce16b3df3..4a78e5726337 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -295,7 +295,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, - unsigned long nr_pages) + unsigned long nr_pages, + unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio *bio = NULL; @@ -327,7 +328,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = fs_info->fs_devices->latest_bdev; bio = btrfs_bio_alloc(bdev, first_byte); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; refcount_set(&cb->pending_bios, 1); @@ -374,7 +375,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_put(bio); bio = btrfs_bio_alloc(bdev, first_byte); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; bio_add_page(bio, page, PAGE_SIZE, 0); diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index da20755ebf21..93c5b82ae97e 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -91,7 +91,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, - unsigned long nr_pages); + unsigned long nr_pages, + unsigned int write_flags); blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dd941885b9c3..1dfd14678db8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3252,7 +3252,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, delalloc_start, delalloc_end, &page_started, - nr_written); + nr_written, wbc); /* File system has been set read-only */ if (ret) { SetPageError(page); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 861dacb371c7..d8b27af7101e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -115,7 +115,8 @@ struct extent_io_ops { */ int (*fill_delalloc)(void *private_data, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written); + unsigned long *nr_written, + struct writeback_control *wbc); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b93fe05a39c7..8525a44a607e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -378,6 +378,7 @@ struct async_cow { struct page *locked_page; u64 start; u64 end; + unsigned int write_flags; struct list_head extents; struct btrfs_work work; }; @@ -857,7 +858,8 @@ retry: async_extent->ram_size, ins.objectid, ins.offset, async_extent->pages, - async_extent->nr_pages)) { + async_extent->nr_pages, + async_cow->write_flags)) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; @@ -1191,7 +1193,8 @@ static noinline void async_cow_free(struct btrfs_work *work) static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct async_cow *async_cow; @@ -1208,6 +1211,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; + async_cow->write_flags = write_flags; if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && !btrfs_test_opt(fs_info, FORCE_COMPRESS)) @@ -1577,11 +1581,13 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end) */ static int run_delalloc_range(void *private_data, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written) + unsigned long *nr_written, + struct writeback_control *wbc) { struct inode *inode = private_data; int ret; int force_cow = need_force_cow(inode, start, end); + unsigned int write_flags = wbc_to_write_flags(wbc); if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1596,7 +1602,8 @@ static int run_delalloc_range(void *private_data, struct page *locked_page, set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written); + page_started, nr_written, + write_flags); } if (ret) btrfs_cleanup_ordered_extents(inode, start, end - start + 1); -- cgit v1.2.3 From 5e9f2ad5b2904a7e81df6d9a3dbef29478952eac Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 23 Oct 2017 09:58:46 +0300 Subject: btrfs: Fix transaction abort during failure in btrfs_rm_dev_item btrfs_rm_dev_item calls several function under an active transaction, however it fails to abort it if an error happens. Fix this by adding explicit btrfs_abort_transaction/btrfs_end_transaction calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1ecb938ba4d..84d37b4ab230 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1750,20 +1750,24 @@ static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, key.offset = device->devid; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; + if (ret) { + if (ret > 0) + ret = -ENOENT; + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; + if (ret) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } + out: btrfs_free_path(path); - btrfs_commit_transaction(trans); + if (!ret) + ret = btrfs_commit_transaction(trans); return ret; } -- cgit v1.2.3 From 3065ae5b85654e9141b134bc7b07eb0f2ca7cfcf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Oct 2017 18:36:08 +0100 Subject: btrfs: add missing device::flush_bio puts This fixes potential bio leaks, in several error paths. Unfortunatelly the device structure freeing is opencoded in many places and I missed them when introducing the flush_bio. Most of the time, devices get freed through call_rcu(..., free_device), so it at least it's not that easy to hit the leak, but it's still possible through the path that frees stale devices. Fixes: e0ae99941423 ("btrfs: preallocate device flush bio") Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 84d37b4ab230..b5c67dd5e522 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -189,6 +189,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device, dev_list); list_del(&device->dev_list); rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); } kfree(fs_devices); @@ -578,6 +579,7 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) fs_devs->num_devices--; list_del(&dev->dev_list); rcu_string_free(dev->name); + bio_put(dev->flush_bio); kfree(dev); } break; @@ -630,6 +632,7 @@ static noinline int device_list_add(const char *path, name = rcu_string_strdup(path, GFP_NOFS); if (!name) { + bio_put(device->flush_bio); kfree(device); return -ENOMEM; } @@ -742,6 +745,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) name = rcu_string_strdup(orig_dev->name->str, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); goto error; } @@ -807,6 +811,7 @@ again: list_del_init(&device->dev_list); fs_devices->num_devices--; rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); } @@ -2353,6 +2358,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); ret = -ENOMEM; goto error; @@ -2362,6 +2368,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); ret = PTR_ERR(trans); goto error; @@ -2505,6 +2512,7 @@ error_trans: if (trans) btrfs_end_transaction(trans); rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2571,6 +2579,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { + bio_put(device->flush_bio); kfree(device); ret = -ENOMEM; goto error; @@ -6288,6 +6297,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, ret = find_next_devid(fs_info, &tmp); if (ret) { + bio_put(dev->flush_bio); kfree(dev); return ERR_PTR(ret); } -- cgit v1.2.3 From 619c47f3d4cd7a60576fd15e133a2eee4fcc0c4e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 19 Jun 2017 14:14:22 +0200 Subject: btrfs: dev_alloc_list is not protected by RCU, use normal list_del The dev_alloc_list list could be protected by various mutexes, depending on the context. The list tracks devices that can take part of allocating new chunks, so the closest mutex is chunk_mutex. Adding a new device from inside the ADD_DEV ioctl will need device_list_mutex and registering a new device from the ioctl needs uuid_mutex. All mutexes naturally guarantee exclusivity against the same context. The device ownership can move between the contexts and the exclusivity is guaranteed by other means, eg. during the mount with the uuid_mutex. There's no RCU involved for dev_alloc_list. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5c67dd5e522..d48b24e54366 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2002,7 +2002,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, fs_devices = srcdev->fs_devices; list_del_rcu(&srcdev->dev_list); - list_del_rcu(&srcdev->dev_alloc_list); + list_del(&srcdev->dev_alloc_list); fs_devices->num_devices--; if (srcdev->missing) fs_devices->missing_devices--; -- cgit v1.2.3 From 56a0e706fcf870270878d6d72b71092ae42d229c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 30 Oct 2017 11:14:38 -0600 Subject: Btrfs: bail out gracefully rather than BUG_ON If a file's DIR_ITEM key is invalid (due to memory errors) and gets written to disk, a future lookup_path can end up with kernel panic due to BUG_ON(). This gets rid of the BUG_ON(), meanwhile output the corrupted key and return ENOENT if it's invalid. Signed-off-by: Liu Bo Reported-by: Guillaume Bouchard Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8525a44a607e..1131db7a0b28 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5445,6 +5445,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, goto out_err; btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); + if (location->type != BTRFS_INODE_ITEM_KEY && + location->type != BTRFS_ROOT_ITEM_KEY) { + btrfs_warn(root->fs_info, +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", + __func__, name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + goto out_err; + } out: btrfs_free_path(path); return ret; @@ -5761,8 +5769,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } - BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); - index = srcu_read_lock(&fs_info->subvol_srcu); ret = fixup_tree_root_location(fs_info, dir, dentry, &location, &sub_root); -- cgit v1.2.3 From 9271c0ca573e02a360b636ecd8cb408852f4e9f6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 8 Nov 2017 17:25:04 +0200 Subject: drm/edid: Don't send non-zero YQ in AVI infoframe for HDMI 1.x sinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some sinks look at the YQ bits even when receiving RGB, and they get somehow confused when they see a non-zero YQ value. So we can't just blindly follow CEA-861-F and set YQ to match the RGB range. Unfortunately there is no good way to tell whether the sink designer claims to have read CEA-861-F. The CEA extension block revision number has generally been stuck at 3 since forever, and even a very recently manufactured sink might be based on an old design so the manufacturing date doesn't seem like something we can use. In lieu of better information let's follow CEA-861-F only for HDMI 2.0 sinks, since HDMI 2.0 is based on CEA-861-F. For HDMI 1.x sinks we'll always set YQ=0. The alternative would of course be to always set YQ=0. And if we ever encounter a HDMI 2.0+ sink with this bug that's what we'll probably have to do. Cc: stable@vger.kernel.org Cc: Jani Nikula Cc: Eric Anholt Cc: Neil Kownacki Reported-by: Neil Kownacki Tested-by: Neil Kownacki Fixes: fcc8a22cc905 ("drm/edid: Set YQ bits in the AVI infoframe according to CEA-861-F") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101639 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171108152504.12596-1-ville.syrjala@linux.intel.com Acked-by: Eric Anholt --- drivers/gpu/drm/drm_edid.c | 12 ++++++++++-- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- drivers/gpu/drm/vc4/vc4_hdmi.c | 3 ++- include/drm/drm_edid.h | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 6bb6337be920..fc7946eb6665 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4809,7 +4809,8 @@ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range, - bool rgb_quant_range_selectable) + bool rgb_quant_range_selectable, + bool is_hdmi2_sink) { /* * CEA-861: @@ -4833,8 +4834,15 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, * YQ-field to match the RGB Quantization Range being transmitted * (e.g., when Limited Range RGB, set YQ=0 or when Full Range RGB, * set YQ=1) and the Sink shall ignore the YQ-field." + * + * Unfortunate certain sinks (eg. VIZ Model 67/E261VA) get confused + * by non-zero YQ when receiving RGB. There doesn't seem to be any + * good way to tell which version of CEA-861 the sink supports, so + * we limit non-zero YQ to HDMI 2.0 sinks only as HDMI 2.0 is based + * on on CEA-861-F. */ - if (rgb_quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) + if (!is_hdmi2_sink || + rgb_quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; else diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e8abea7594ec..3fed1d3ecded 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -481,7 +481,8 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder, crtc_state->limited_color_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL, - intel_hdmi->rgb_quant_range_selectable); + intel_hdmi->rgb_quant_range_selectable, + is_hdmi2_sink); /* TODO: handle pixel repetition for YCBCR420 outputs */ intel_write_infoframe(encoder, crtc_state, &frame); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 937da8dd65b8..8f71157a2b06 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -433,7 +433,8 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) vc4_encoder->limited_rgb_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL, - vc4_encoder->rgb_range_selectable); + vc4_encoder->rgb_range_selectable, + false); vc4_hdmi_write_infoframe(encoder, &frame); } diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 1e1908a6b1d6..a992434ded99 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -360,7 +360,8 @@ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range, - bool rgb_quant_range_selectable); + bool rgb_quant_range_selectable, + bool is_hdmi2_sink); /** * drm_eld_mnl - Get ELD monitor name length in bytes. -- cgit v1.2.3 From 53d1cd6b125fb9d69303516a1179ebc3b72f797a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: cpupowerutils: bench - Fix cpu online check cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51ba..2bb3eef7d5c1 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; -- cgit v1.2.3 From dbdc468f35ee827cab2753caa1c660bdb832243a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: cpupower : Fix cpupower working when cpu0 is offline cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf..5b3205f16217 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); -- cgit v1.2.3 From f48bf66b662e7acd6a32dbc28c4fa38931f8f0a6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 3 Nov 2017 22:26:44 +0000 Subject: Btrfs: move definition of the function btrfs_find_new_delalloc_bytes Move the definition of the function btrfs_find_new_delalloc_bytes() closer to the function btrfs_dirty_pages(), because in a future commit it will be used exclusively by btrfs_dirty_pages(). This just moves the function's definition, with no functional changes at all. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 82 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f80254d82f40..47e6ebad78c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -477,6 +477,47 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, + const u64 start, + const u64 len, + struct extent_state **cached_state) +{ + u64 search_start = start; + const u64 end = start + len - 1; + + while (search_start < end) { + const u64 search_len = end - search_start + 1; + struct extent_map *em; + u64 em_len; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, search_start, + search_len, 0); + if (IS_ERR(em)) + return PTR_ERR(em); + + if (em->block_start != EXTENT_MAP_HOLE) + goto next; + + em_len = em->len; + if (em->start < search_start) + em_len -= search_start - em->start; + if (em_len > search_len) + em_len = search_len; + + ret = set_extent_bit(&inode->io_tree, search_start, + search_start + em_len - 1, + EXTENT_DELALLOC_NEW, + NULL, cached_state, GFP_NOFS); +next: + search_start = extent_map_end(em); + free_extent_map(em); + if (ret) + return ret; + } + return 0; +} + /* * after copy_from_user, pages need to be dirtied and we need to make * sure holes are created between the current EOF and the start of @@ -1404,47 +1445,6 @@ fail: } -static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, - const u64 start, - const u64 len, - struct extent_state **cached_state) -{ - u64 search_start = start; - const u64 end = start + len - 1; - - while (search_start < end) { - const u64 search_len = end - search_start + 1; - struct extent_map *em; - u64 em_len; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, search_start, - search_len, 0); - if (IS_ERR(em)) - return PTR_ERR(em); - - if (em->block_start != EXTENT_MAP_HOLE) - goto next; - - em_len = em->len; - if (em->start < search_start) - em_len -= search_start - em->start; - if (em_len > search_len) - em_len = search_len; - - ret = set_extent_bit(&inode->io_tree, search_start, - search_start + em_len - 1, - EXTENT_DELALLOC_NEW, - NULL, cached_state, GFP_NOFS); -next: - search_start = extent_map_end(em); - free_extent_map(em); - if (ret) - return ret; - } - return 0; -} - /* * This function locks the extent and properly waits for data=ordered extents * to finish before allowing the pages to be modified if need. -- cgit v1.2.3 From e3b8a4858566a6cc25422fbfdfdd760b13b79280 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 4 Nov 2017 00:16:59 +0000 Subject: Btrfs: fix reported number of inode blocks after buffered append writes The patch from commit a7e3b975a0f9 ("Btrfs: fix reported number of inode blocks") introduced a regression where if we do a buffered write starting at position equal to or greater than the file's size and then stat(2) the file before writeback is triggered, the number of used blocks does not change (unless there's a prealloc/unwritten extent). Example: $ xfs_io -f -c "pwrite -S 0xab 0 64K" foobar $ du -h foobar 0 foobar $ sync $ du -h foobar 64K foobar The first version of that patch didn't had this regression and the second version, which was the one committed, was made only to address some performance regression detected by the intel test robots using fs_mark. This fixes the regression by setting the new delaloc bit in the range, and doing it at btrfs_dirty_pages() while setting the regular dealloc bit as well, so that this way we set both bits at once avoiding navigation of the inode's io tree twice. Doing it at btrfs_dirty_pages() is also the most meaninful place, as we should set the new dellaloc bit when if we set the delalloc bit, which happens only if we copied bytes into the pages at __btrfs_buffered_write(). This was making some of LTP's du tests fail, which can be quickly run using a command line like the following: $ ./runltp -q -p -l /ltp.log -f commands -s du -d /mnt Fixes: a7e3b975a0f9 ("Btrfs: fix reported number of inode blocks") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent_io.h | 5 +++-- fs/btrfs/file.c | 43 ++++++++++++++++++++++++---------------- fs/btrfs/inode.c | 9 +++++---- fs/btrfs/relocation.c | 3 ++- fs/btrfs/tests/extent-io-tests.c | 6 +++--- fs/btrfs/tests/inode-tests.c | 12 +++++------ 7 files changed, 46 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7df5536ab61..72dcbf19f6ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3180,6 +3180,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, int nr); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + unsigned int extra_bits, struct extent_state **cached_state, int dedupe); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct btrfs_root *new_root, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index d8b27af7101e..2ef824b58e3c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -365,10 +365,11 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state); static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state) + u64 end, unsigned int extra_bits, + struct extent_state **cached_state) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_UPTODATE, + EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits, NULL, cached_state, GFP_NOFS); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 47e6ebad78c3..2de7b4f4ca3c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -538,14 +538,34 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, u64 end_of_last_block; u64 end_pos = pos + write_bytes; loff_t isize = i_size_read(inode); + unsigned int extra_bits = 0; start_pos = pos & ~((u64) fs_info->sectorsize - 1); num_bytes = round_up(write_bytes + pos - start_pos, fs_info->sectorsize); end_of_last_block = start_pos + num_bytes - 1; + + if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (start_pos >= isize && + !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { + /* + * There can't be any extents following eof in this case + * so just set the delalloc new bit for the range + * directly. + */ + extra_bits |= EXTENT_DELALLOC_NEW; + } else { + err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), + start_pos, + num_bytes, cached); + if (err) + return err; + } + } + err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, - cached, 0); + extra_bits, cached, 0); if (err) return err; @@ -1473,10 +1493,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, + round_up(pos + write_bytes - start_pos, fs_info->sectorsize) - 1; - if (start_pos < inode->vfs_inode.i_size || - (inode->flags & BTRFS_INODE_PREALLOC)) { + if (start_pos < inode->vfs_inode.i_size) { struct btrfs_ordered_extent *ordered; - unsigned int clear_bits; lock_extent_bits(&inode->io_tree, start_pos, last_pos, cached_state); @@ -1498,19 +1516,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } if (ordered) btrfs_put_ordered_extent(ordered); - ret = btrfs_find_new_delalloc_bytes(inode, start_pos, - last_pos - start_pos + 1, - cached_state); - clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG; - if (ret) - clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED; - clear_extent_bit(&inode->io_tree, start_pos, - last_pos, clear_bits, - (clear_bits & EXTENT_LOCKED) ? 1 : 0, - 0, cached_state, GFP_NOFS); - if (ret) - return ret; + clear_extent_bit(&inode->io_tree, start_pos, last_pos, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, + 0, 0, cached_state, GFP_NOFS); *lockstart = start_pos; *lockend = last_pos; ret = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1131db7a0b28..993061f83067 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2032,11 +2032,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, } int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + unsigned int extra_bits, struct extent_state **cached_state, int dedupe) { WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - cached_state); + extra_bits, cached_state); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -2097,7 +2098,7 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, + btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state, 0); ClearPageChecked(page); set_page_dirty(page); @@ -4797,7 +4798,7 @@ again: EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, + ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, @@ -9163,7 +9164,7 @@ again: EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, end, + ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4cf2eb67eba6..f0c3f00e97cb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3268,7 +3268,8 @@ static int relocate_file_extent_cluster(struct inode *inode, nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end, NULL, 0); + btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL, + 0); set_page_dirty(page); unlock_extent(&BTRFS_I(inode)->io_tree, diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index d06b1c931d05..2e7f64a3b22b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -114,7 +114,7 @@ static int test_find_delalloc(u32 sectorsize) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -145,7 +145,7 @@ static int test_find_delalloc(u32 sectorsize) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -200,7 +200,7 @@ static int test_find_delalloc(u32 sectorsize) * * We are re-using our test_start from above since it works out well. */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL); + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index f797642c013d..30affb60da51 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) btrfs_test_inode_set_ops(inode); /* [BTRFS_MAX_EXTENT_SIZE] */ - ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, + ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -984,7 +984,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1018,7 +1018,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1036,7 +1036,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, - NULL, 0); + 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1053,7 +1053,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + sectorsize, - BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1089,7 +1089,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) */ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + sectorsize, - BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; -- cgit v1.2.3 From 51c4b8bba674cfd2260d173602c4dac08e4c3a99 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:11:30 +0200 Subject: KVM: x86: pvclock: Handle first-time write to pvclock-page contains random junk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest passes KVM it's pvclock-page GPA via WRMSR to MSR_KVM_SYSTEM_TIME / MSR_KVM_SYSTEM_TIME_NEW, KVM don't initialize pvclock-page to some start-values. It just requests a clock-update which will happen before entering to guest. The clock-update logic will call kvm_setup_pvclock_page() to update the pvclock-page with info. However, kvm_setup_pvclock_page() *wrongly* assumes that the version-field is initialized to an even number. This is wrong because at first-time write, field could be any-value. Fix simply makes sure that if first-time version-field is odd, increment it once more to make it even and only then start standard logic. This follows same logic as done in other pvclock shared-pages (See kvm_write_wall_clock() and record_steal_time()). Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c85aa2e2d1..1d492b3660d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1830,6 +1830,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, -- cgit v1.2.3 From ac9b305caa0df6f5b75d294e4b86c1027648991e Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 6 Nov 2017 16:15:10 +0200 Subject: KVM: nVMX/nSVM: Don't intercept #UD when running L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running L2, #UD should be intercepted by L1 or just forwarded directly to L2. It should not reach L0 x86 emulator. Therefore, set intercept for #UD only based on L1 exception-bitmap. Also add WARN_ON_ONCE() on L0 #UD intercept handlers to make sure it is never reached while running L2. This improves commit ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode") by removing an unnecessary exit from L2 to L0 on #UD when L1 doesn't intercept it. In addition, SVM L0 #UD intercept handler doesn't handle correctly the case it is raised from L2. In this case, it should forward the #UD to guest instead of x86 emulator. As done in VMX #UD intercept handler. This commit fixes this issue as-well. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx.c | 9 ++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59e13a79c2e3..1bf7c09d97e6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; + u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; + /* No need to intercept #UD if L1 doesn't intercept it */ + h_intercept_exceptions = + h->intercept_exceptions & ~(1U << UD_VECTOR); + c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = + h_intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2196,6 +2202,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 714a0673ec3c..d319e2666ad5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else + eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5915,10 +5917,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); -- cgit v1.2.3 From 61cb57c9ed631c95b54f8e9090c89d18b3695b3c Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:32 +0200 Subject: KVM: x86: Exit to user-mode on #UD intercept when emulator requires MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instruction emulation after trapping a #UD exception can result in an MMIO access, for example when emulating a MOVBE on a processor that doesn't support the instruction. In this case, the #UD vmexit handler must exit to user mode, but there wasn't any code to do so. Add it for both VMX and SVM. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1bf7c09d97e6..eb714f1cdf7e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2204,6 +2204,8 @@ static int ud_interception(struct vcpu_svm *svm) WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d319e2666ad5..65f1f06f6aaa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5919,6 +5919,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) { WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; -- cgit v1.2.3 From 1f4dcb3b213235e642088709a1c54964d23365e9 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d492b3660d5..e5a7c53a19c6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5433,7 +5433,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); -- cgit v1.2.3 From 9b8ae63798cb97e785a667ff27e43fa6220cb734 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c34..977de5fb968b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 65f1f06f6aaa..6e4a0f822766 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6603,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; -- cgit v1.2.3 From 3853be2603191829b442b64dac6ae8ba0c027bf9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8079d141792a..b4a87debbb4b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5000,6 +5000,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5018,6 +5020,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; -- cgit v1.2.3 From f1b026a3310a441f504640dd3d9765eb533386b8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:48 -0800 Subject: KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the SDM, if the "load IA32_BNDCFGS" VM-entry controls is 1, the following checks are performed on the field for the IA32_BNDCFGS MSR: - Bits reserved in the IA32_BNDCFGS MSR must be 0. - The linear address in bits 63:12 must be canonical. Reviewed-by: Konrad Rzeszutek Wilk Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e4a0f822766..707aaa954b3d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10876,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return 1; + return 0; } -- cgit v1.2.3 From 5af4157388adad82c339e3742fb6b67840721347 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:49 -0800 Subject: KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1) null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1. In L1: BUG: unable to handle kernel paging request at ffffffffc015bf8f IP: vmx_vcpu_run+0x202/0x510 [kvm_intel] PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161 Oops: 0003 [#1] PREEMPT SMP CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6 RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel] Call Trace: WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002 In L0: -----------[ cut here ]------------ WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25 RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] Call Trace: paging64_page_fault+0x500/0xde0 [kvm] ? paging32_gva_to_gpa_nested+0x120/0x120 [kvm] ? nonpaging_page_fault+0x3b0/0x3b0 [kvm] ? __asan_storeN+0x12/0x20 ? paging64_gva_to_gpa+0xb0/0x120 [kvm] ? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm] ? lock_acquire+0x2c0/0x2c0 ? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel] ? vmx_get_segment+0x2a6/0x310 [kvm_intel] ? sched_clock+0x1f/0x30 ? check_chain_key+0x137/0x1e0 ? __lock_acquire+0x83c/0x2420 ? kvm_multiple_exception+0xf2/0x220 [kvm] ? debug_check_no_locks_freed+0x240/0x240 ? debug_smp_processor_id+0x17/0x20 ? __lock_is_held+0x9e/0x100 kvm_mmu_page_fault+0x90/0x180 [kvm] kvm_handle_page_fault+0x15c/0x310 [kvm] ? __lock_is_held+0x9e/0x100 handle_exception+0x3c7/0x4d0 [kvm_intel] vmx_handle_exit+0x103/0x1010 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm] The commit avoids to load host state of vmcs12 as vmcs01's guest state since vmcs12 is not modified (except for the VM-instruction error field) if the checking of vmcs control area fails. However, the mmu context is switched to nested mmu in prepare_vmcs02() and it will not be reloaded since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME fails. This patch fixes it by reloading mmu context when nested VMLAUNCH/VMRESUME fails. Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 707aaa954b3d..10474d26a000 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11330,6 +11330,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11343,7 +11361,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11370,17 +11387,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11610,6 +11617,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never -- cgit v1.2.3 From 4d772cb85f64c16eca00177089ecb3cd5d292120 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Nov 2017 18:04:05 +0100 Subject: KVM: x86: fix em_fxstor() sleeping while in atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") optimize the stack size, but introduced a guest memory access which might sleep while in atomic. Fix it by introducing, again, a second fxregs_state. Try to avoid large stacks by using noinline. Add some helpful comments. Reported by syzbot: in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109 2 locks held by syzkaller879109/2909: #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x1c/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:154 #1: (&kvm->srcu){....}, at: [] vcpu_enter_guest arch/x86/kvm/x86.c:6983 [inline] #1: (&kvm->srcu){....}, at: [] vcpu_run arch/x86/kvm/x86.c:7061 [inline] #1: (&kvm->srcu){....}, at: [] kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222 CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014 __might_sleep+0x95/0x190 kernel/sched/core.c:5967 __might_fault+0xab/0x1d0 mm/memory.c:4383 __copy_from_user include/linux/uaccess.h:71 [inline] __kvm_read_guest_page+0x58/0xa0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771 kvm_vcpu_read_guest_page+0x44/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791 kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407 kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466 segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819 em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022 x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471 x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698 kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854 handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400 vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718 vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline] vcpu_run arch/x86/kvm/x86.c:7061 [inline] kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222 kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x437fc9 RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000 R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000 Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4a87debbb4b..e7d04d0c8008 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) fxstate_size(ctxt)); } +/* + * FXRSTOR might restore XMM registers not provided by the guest. Fill + * in the host registers (via FXSAVE) instead, so they won't be modified. + * (preemption has to stay disabled until FXRSTOR). + * + * Use noinline to keep the stack for other functions called by callers small. + */ +static noinline int fxregs_fixup(struct fxregs_state *fx_state, + const size_t used_size) +{ + struct fxregs_state fx_tmp; + int rc; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, + __fxstate_size(16) - used_size); + + return rc; +} + static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; @@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + size = fxstate_size(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->get_fpu(ctxt); - size = fxstate_size(ctxt); if (size < __fxstate_size(16)) { - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) goto out; } - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); - if (rc != X86EMUL_CONTINUE) - goto out; - if (fx_state.mxcsr >> 16) { rc = emulate_gp(ctxt, 0); goto out; -- cgit v1.2.3 From fab0aa3b776f0a3af1db1f50e04f1884015f9082 Mon Sep 17 00:00:00 2001 From: Eyal Moscovici Date: Wed, 8 Nov 2017 14:32:08 +0200 Subject: KVM: x86: Allow suppressing prints on RDMSR/WRMSR of unhandled MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some guests use these unhandled MSRs very frequently. This cause dmesg to be populated with lots of aggregated messages on usage of ignored MSRs. As ignore_msrs=true means that the user is well-aware his guest use ignored MSRs, allow to also disable the prints on their usage. An example of such guest is ESXi which tends to access a lot to MSR 0x34 (MSR_SMI_COUNT) very frequently. In addition, we have observed this to cause unnecessary delays to guest execution. Such an example is ESXi which experience networking delays in it's guests (L2 guests) because of these prints (even when prints are rate-limited). This can easily be reproduced by pinging from one L2 guest to another. Once in a while, a peak in ping RTT will be observed. Removing these unhandled MSR prints solves the issue. Because these prints can help diagnose issues with guests, this commit only suppress them by a module parameter instead of removing them from code entirely. Signed-off-by: Eyal Moscovici Reviewed-by: Liran Alon Reviewed-by: Krish Sadhukhan Signed-off-by: Krish Sadhukhan Signed-off-by: Konrad Rzeszutek Wilk [Changed suppress_ignore_msrs_prints to report_ignored_msrs - Radim] Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5a7c53a19c6..0c5b141927b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool __read_mostly report_ignored_msrs = true; +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); + unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); @@ -2325,7 +2328,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) @@ -2362,8 +2367,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, + "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; } } @@ -2581,7 +2588,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->index); return 1; } else { - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", + msr_info->index); msr_info->data = 0; } break; -- cgit v1.2.3 From 6ea6e84309ca7e0e850b3083e6b09344ee15c290 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 Nov 2017 10:49:38 +0100 Subject: KVM: x86: inject exceptions produced by x86_decode_insn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes, a processor might execute an instruction while another processor is updating the page tables for that instruction's code page, but before the TLB shootdown completes. The interesting case happens if the page is in the TLB. In general, the processor will succeed in executing the instruction and nothing bad happens. However, what if the instruction is an MMIO access? If *that* happens, KVM invokes the emulator, and the emulator gets the updated page tables. If the update side had marked the code page as non present, the page table walk then will fail and so will x86_decode_insn. Unfortunately, even though kvm_fetch_guest_virt is correctly returning X86EMUL_PROPAGATE_FAULT, x86_decode_insn's caller treats the failure as a fatal error if the instruction cannot simply be reexecuted (as is the case for MMIO). And this in fact happened sometimes when rebooting Windows 2012r2 guests. Just checking ctxt->have_exception and injecting the exception if true is enough to fix the case. Thanks to Eduardo Habkost for helping in the debugging of this issue. Reported-by: Yanan Fu Cc: Eduardo Habkost Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c5b141927b8..4552427105f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5734,6 +5734,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); -- cgit v1.2.3 From 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bdff437acbcb..ae0a7dc318b2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } -- cgit v1.2.3 From da3fe7bdfada217bf02ecd0477fcdb55da50944c Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:30 +0200 Subject: KVM: x86: ioapic: Don't fire level irq when Remote IRR set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid firing a level-triggered interrupt that has the Remote IRR bit set, because that means that some CPU is already processing it. The Remote IRR bit will be cleared after an EOI and the interrupt will refire if the irq line is still asserted. This behavior is aligned with QEMU's IOAPIC implementation that was introduced by commit f99b86b94987 ("x86: ioapic: ignore level irq during processing") in QEMU. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index ae0a7dc318b2..5c9231139243 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -323,7 +323,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) struct kvm_lapic_irq irqe; int ret; - if (entry->fields.mask) + if (entry->fields.mask || + (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && + entry->fields.remote_irr)) return -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " -- cgit v1.2.3 From 7d2253684dd10eb800ee1898ad7904044ae88ed6 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:31 +0200 Subject: KVM: x86: ioapic: Remove redundant check for Remote IRR in ioapic_set_irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remote IRR for level-triggered interrupts was previously checked in ioapic_set_irq, but since we now have a check in ioapic_service we can remove the redundant check from ioapic_set_irq. This commit doesn't change semantics. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5c9231139243..6df150eaaa78 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; - if (edge) + if (edge) { ioapic->irr_delivered &= ~mask; - if ((edge && old_irr == ioapic->irr) || - (!edge && entry.fields.remote_irr)) { - ret = 0; - goto out; + if (old_irr == ioapic->irr) { + ret = 0; + goto out; + } } ret = ioapic_service(ioapic, irq, line_status); -- cgit v1.2.3 From a8bfec2930525808c01f038825d1df3904638631 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6df150eaaa78..163d340ee5f8 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -304,8 +304,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); -- cgit v1.2.3 From b200dded0a6974a3b69599832b2203483920ab25 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 163d340ee5f8..4e822ad363f3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -276,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -298,6 +299,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -305,6 +309,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will -- cgit v1.2.3 From 917dc6068bc12a2dafffcf0e9d405ddb1b8780cb Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:07:43 +0200 Subject: KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_check_nested_events() should return -EBUSY only in case there is a pending L1 event which requires a VMExit from L2 to L1 but such a VMExit is currently blocked. Such VMExits are blocked either because nested_run_pending=1 or an event was reinjected to L2. vmx_check_nested_events() should return 0 in case there are no pending L1 events which requires a VMExit from L2 to L1 or if a VMExit from L2 to L1 was done internally. However, upstream commit which introduced blocking in case an event was reinjected to L2 (commit acc9ab601327 ("KVM: nVMX: Fix pending events injection")) contains a bug: It returns -EBUSY even if there are no pending L1 events which requires VMExit from L2 to L1. This commit fix this issue. Fixes: acc9ab601327 ("KVM: nVMX: Fix pending events injection") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10474d26a000..be4724b5d434 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11105,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11120,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11143,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; -- cgit v1.2.3 From 50a671d4d15b859f447fa527191073019b6ce9cb Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 6 Nov 2017 11:44:23 -0600 Subject: KVM: x86: Fix CPUID function for word 6 (80000001_ECX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function for CPUID 80000001 ECX is set to 0xc0000001. Set it to 0x80000001. Signed-off-by: Janakarajan Natarajan Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: Borislav Petkov Fixes: d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers") Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3a6583..c2cea6651279 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, -- cgit v1.2.3 From c4ad77e0d49b10b412a9fa7f47a3a23177870bc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Nov 2017 14:23:59 +0100 Subject: KVM: vmx: use X86_CR4_UMIP and X86_FEATURE_UMIP These bits were not defined until now in common code, but they are now that the kernel supports UMIP too. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index be4724b5d434..0d59dbe430c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9801,8 +9801,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ - cr4_fixed1_update(bit(11), ecx, bit(2)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); #undef cr4_fixed1_update } -- cgit v1.2.3 From 97f07697932e6faf2a708f7aef7cba8e6b0cab96 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:37:54 +0800 Subject: bdi: convert bdi_debug_register to int Convert bdi_debug_register to int and then do error handle for it. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 74b52dfd5852..b5f940ce0143 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = { .release = single_release, }; -static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + if (!bdi_debug_root) + return -ENOMEM; + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + if (!bdi->debug_dir) + return -ENOMEM; + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, &bdi_debug_stats_fops); + if (!bdi->debug_stats) { + debugfs_remove(bdi->debug_dir); + return -ENOMEM; + } + + return 0; } static void bdi_debug_unregister(struct backing_dev_info *bdi) @@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi) static inline void bdi_debug_init(void) { } -static inline void bdi_debug_register(struct backing_dev_info *bdi, +static inline int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + return 0; } static inline void bdi_debug_unregister(struct backing_dev_info *bdi) { -- cgit v1.2.3 From a0747a859ef6d3cc5b6cd50eb694499b78dd0025 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Fri, 17 Nov 2017 23:06:04 +0800 Subject: bdi: add error handle for bdi_debug_register In order to make error handle more cleaner we call bdi_debug_register before set state to WB_registered, that we can avoid call bdi_unregister in release_bdi(). Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b5f940ce0143..84b2dc76f140 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -882,10 +882,13 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (IS_ERR(dev)) return PTR_ERR(dev); + if (bdi_debug_register(bdi, dev_name(dev))) { + device_destroy(bdi_class, dev->devt); + return -ENOMEM; + } cgwb_bdi_register(bdi); bdi->dev = dev; - bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); -- cgit v1.2.3 From 3a92168bc8a113098b44a95d499557a88bb387da Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:38:59 +0800 Subject: block: add WARN_ON if bdi register fail device_add_disk need do more safety error handle, so this patch just add WARN_ON. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Adapted for current series by me. Signed-off-by: Jens Axboe --- block/genhd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index c2223f12a805..c143a2274238 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; disk->flags |= GENHD_FL_NO_PART_SCAN; } else { + int ret; + /* Register BDI before referencing it from bdev */ disk_to_dev(disk)->devt = devt; - bdi_register_owner(disk->queue->backing_dev_info, - disk_to_dev(disk)); + ret = bdi_register_owner(disk->queue->backing_dev_info, + disk_to_dev(disk)); + WARN_ON(ret); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } -- cgit v1.2.3 From 7fb526212f16fcec4e92121ea86dc28fba493177 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Nov 2017 17:43:38 -0800 Subject: block: genhd.c: fix message typo Fix typo in error message. Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index c143a2274238..96a66f671720 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1392,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (minors > DISK_MAX_PARTS) { printk(KERN_ERR - "block: can't allocated more than %d partitions\n", + "block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); minors = DISK_MAX_PARTS; } -- cgit v1.2.3 From 1690102de5651bb85b23d5eeaff682a6b96d705b Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 19 Nov 2017 16:48:13 -0200 Subject: blktrace: Use blk_trace_bio_get_cgid inside blk_add_trace_bio We always pass in blk_trace_bio_get_cgid(q, bio) to blk_add_trace_bio(). Since both are readily available in the function already, kill the argument. Signed-off-by: Marcos Paulo de Souza Rewrote commit message. Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 206e0e2ace53..c5987d4c5f23 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what, int error, union kernfs_node_id *cgid) + u32 what, int error) { struct blk_trace *bt = q->blk_trace; @@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, return; __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); + bio_op(bio), bio->bi_opf, what, error, 0, NULL, + blk_trace_bio_get_cgid(q, bio)); } static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, @@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, @@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { struct blk_trace *bt = q->blk_trace; @@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { struct blk_trace *bt = q->blk_trace; -- cgit v1.2.3 From 48832f8d58cfedb2f9bee11bbfbb657efb42e7e7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:20 +0300 Subject: nvme-fabrics: introduce init command check for a queue that is not alive When the fabrics queue is not alive and fully functional, no commands should be allowed to pass but connect (which moves the queue to a fully functional state). Any other command should be failed, with either temporary status BLK_STS_RESOUCE or permanent status BLK_STS_IOERR. This is shared across all fabrics, hence move the check to fabrics library. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/rdma.c | 32 ++++++-------------------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 42232e731f19..9ba614953607 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); +static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, + struct request *rq) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + /* + * We cannot accept any other command until the connect command has + * completed, so only allow connect to pass. + */ + if (!blk_rq_is_passthrough(rq) || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) { + /* + * Reconnecting state means transport disruption, which can take + * a long time and even might fail permanently, fail fast to + * give upper layers a chance to failover. + * Deleting state means that the ctrl will never accept commands + * again, fail it permanently. + */ + if (ctrl->state == NVME_CTRL_RECONNECTING || + ctrl->state == NVME_CTRL_DELETING) { + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; + } + return BLK_STS_RESOURCE; /* try again later */ + } + + return BLK_STS_OK; +} + #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4f9bf2f815c3..2c597105a6bf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1591,31 +1591,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved) * We cannot accept any other command until the Connect command has completed. */ static inline blk_status_t -nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) -{ - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = nvme_req(rq)->cmd; - - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * reconnecting state means transport disruption, which - * can take a long time and even might fail permanently, - * fail fast to give upper layers a chance to failover. - * deleting state means that the ctrl will never accept - * commands again, fail it permanently. - */ - if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING || - queue->ctrl->ctrl.state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - } - - return 0; +nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) +{ + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1634,7 +1614,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_queue_is_ready(queue, rq); + ret = nvme_rdma_is_ready(queue, rq); if (unlikely(ret)) return ret; -- cgit v1.2.3 From 9e0ed16ab9a9aaf670b81c9cd05b5e50defed654 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:21 +0300 Subject: nvme-fc: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_FC_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7ab0be55c7d0..e0577bf33f45 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -32,6 +32,7 @@ enum nvme_fc_queue_flags { NVME_FC_Q_CONNECTED = (1 << 0), + NVME_FC_Q_LIVE = (1 << 1), }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ @@ -1927,6 +1928,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) return; + clear_bit(NVME_FC_Q_LIVE, &queue->flags); /* * Current implementation never disconnects a single queue. * It always terminates a whole association. So there is never @@ -1934,7 +1936,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); } static void @@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -2320,6 +2323,14 @@ busy: return BLK_STS_RESOURCE; } +static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; + ret = nvme_fc_is_ready(queue, rq); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, rq, sqe); if (ret) return ret; @@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_disconnect_admin_queue; + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * Check controller capabilities * -- cgit v1.2.3 From 9d7fab04b95e8c26014a9bfc1c943b8360b44c17 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:22 +0300 Subject: nvme-loop: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_LOOP_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 96d390416789..1e21b286f299 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -52,10 +52,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_loop_ctrl, ctrl); } +enum nvme_loop_queue_flags { + NVME_LOOP_Q_LIVE = 0, +}; + struct nvme_loop_queue { struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvme_loop_ctrl *ctrl; + unsigned long flags; }; static struct nvmet_port *nvmet_loop_port; @@ -144,6 +149,14 @@ nvme_loop_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -153,6 +166,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; + ret = nvme_loop_is_ready(queue, req); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, req, &iod->cmd); if (ret) return ret; @@ -267,6 +284,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -297,8 +315,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->ctrl.queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + } } static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) @@ -338,6 +358,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); } return 0; @@ -380,6 +401,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, -- cgit v1.2.3 From 8427bbc224863e14d905c87920d4005cb3e88ac3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 9 Nov 2017 01:12:03 -0500 Subject: nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A The NVMe device in question drops off the PCIe bus after system suspend. I've tried several approaches to workaround this issue, but none of them works: - NVME_QUIRK_DELAY_BEFORE_CHK_RDY - NVME_QUIRK_NO_DEEPEST_PS - Disable APST before controller shutdown - Delay between controller shutdown and system suspend - Explicitly set power state to 0 before controller shutdown Fortunately it's a desktop, so disable APST won't hurt the battery. Also, change the quirk function name to reflect it's for vendor combination quirks. BugLink: https://bugs.launchpad.net/bugs/1705748 Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a11cfd470089..8a15aa50b8e0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2428,7 +2428,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } -static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) { if (pdev->vendor == 0x144d && pdev->device == 0xa802) { /* @@ -2443,6 +2443,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) return NVME_QUIRK_NO_DEEPEST_PS; + } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { + /* + * Samsung SSD 960 EVO drops off the PCIe bus after system + * suspend on a Ryzen board, ASUS PRIME B350M-A. + */ + if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && + dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + return NVME_QUIRK_NO_APST; } return 0; @@ -2482,7 +2490,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto unmap; - quirks |= check_dell_samsung_bug(pdev); + quirks |= check_vendor_combination_bug(pdev); result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); -- cgit v1.2.3 From 244a8fe40a09c218622eb9927b9090b0a9b73a1a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 17 Nov 2017 01:34:24 +0900 Subject: nvme-pci: avoid hmb desc array idx out-of-bound when hmmaxd set. hmb descriptor idx out-of-bound occurs in case of below conditions. preferred = 128MiB chunk_size = 4MiB hmmaxd = 1 Current code will not allow rmmod which will free hmb descriptors to be done successfully in above case. "descs[i]" will be set in for-loop without seeing any conditions related to "max_entries" after a single "descs" was allocated by (max_entries = 1) in this case. Added a condition into for-loop to check index of descriptors. Fixes: 044a9df1("nvme-pci: implement the HMB entry number and size limitations") Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8a15aa50b8e0..58dbe684007b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1787,7 +1787,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (!bufs) goto out_free_descs; - for (size = 0; size < preferred; size += len) { + for (size = 0; size < preferred && i < max_entries; size += len) { dma_addr_t dma_addr; len = min_t(u64, chunk_size, preferred - size); -- cgit v1.2.3 From 89c4aff6d4f71726f22e567f046dc1dd73c35de1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 14:26:27 +0000 Subject: nvme: fix spelling mistake: "requeing" -> "requeuing" Trivial fix to spelling mistake in dev_warn_ratelimited message text Signed-off-by: Colin Ian King Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 78d92151a904..1218a9fca846 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, bio->bi_opf |= REQ_NVME_MPATH; ret = direct_make_request(bio); } else if (!list_empty_careful(&head->list)) { - dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); + dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); -- cgit v1.2.3 From b0d61d586f09fd814a45a5d778fe0d6123f67c2a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:49 -0700 Subject: nvme: Fix NULL dereference on reservation request This fixes using the NULL 'head' before getting the reference. It is however possible the head will always be NULL, so this patch uses the struct nvme_ns to get the ns_id field. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 25da74d310d1..a2ab4e440bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, int srcu_idx, ret; u8 data[16] = { 0, }; + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + put_unaligned_le64(key, &data[0]); put_unaligned_le64(sa_key, &data[8]); memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(head->ns_id); + c.common.nsid = cpu_to_le32(ns->head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - ret = -EWOULDBLOCK; - else - ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); + ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); nvme_put_ns_from_disk(head, srcu_idx); return ret; } -- cgit v1.2.3 From 9941a862cc92e448df95709ff40a618964b25e33 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:50 -0700 Subject: nvme: Suppress static analyis warning The ns->head is always valid, so we don't need to check for NULL. Reported-by: Dan Carpenter Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a2ab4e440bea..f837d666cbd4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_ns_remove(struct nvme_ns *ns) { - struct nvme_ns_head *head = ns->head; - if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); nvme_mpath_clear_current_path(ns); - if (head) - list_del_rcu(&ns->siblings); + list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); mutex_unlock(&ns->ctrl->namespaces_mutex); - synchronize_srcu(&head->srcu); + synchronize_srcu(&ns->head->srcu); nvme_put_ns(ns); } -- cgit v1.2.3 From 619c62dcc62b957d17cccde2081cad527b020883 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 10 Nov 2017 15:38:45 -0800 Subject: nvmet-fc: correct ref counting error when deferred rcv used Whenever a cmd is received a reference is taken while looking up the queue. The reference is removed after the cmd is done as the iod is returned for reuse. The fod may be reused for a deferred (recevied but no job context) cmd. Existing code removes the reference only if the fod is not reused for another command. Given the fod may be used for one or more ios, although a reference was taken per io, it won't be matched on the frees. Remove the reference on every fod free. This pairs the references to each io. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 664d3013f68f..5fd86039e353 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -533,15 +533,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + /* release the queue lookup reference on the completed IO */ + nvmet_fc_tgt_q_put(queue); + spin_lock_irqsave(&queue->qlock, flags); deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, struct nvmet_fc_defer_fcp_req, req_list); if (!deferfcp) { list_add_tail(&fod->fcp_list, &fod->queue->fod_list); spin_unlock_irqrestore(&queue->qlock, flags); - - /* Release reference taken at queue lookup and fod allocation */ - nvmet_fc_tgt_q_put(queue); return; } @@ -760,6 +760,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) tgtport->ops->fcp_req_release(&tgtport->fc_target_port, deferfcp->fcp_req); + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + kfree(deferfcp); spin_lock_irqsave(&queue->qlock, flags); -- cgit v1.2.3 From 38389ec84e835fa31a59b7dabb18343106a6d0d5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Nov 2017 14:26:36 +0100 Subject: s390/topology: fix compile error in file arch/s390/kernel/smp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1887aa07b676 ("s390/topology: add detection of dedicated vs shared CPUs") introduced following compiler error when CONFIG_SCHED_TOPOLOGY is not set. CC arch/s390/kernel/smp.o ... arch/s390/kernel/smp.c: In function ‘smp_start_secondary’: arch/s390/kernel/smp.c:812:6: error: implicit declaration of function ‘topology_cpu_dedicated’; did you mean ‘topology_cpu_init’? This patch fixes the compiler error by adding function topology_cpu_dedicated() to return false when this config option is not defined. Signed-off-by: Thomas Richter Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 1 + arch/s390/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 1807229b292f..cca406fdbe51 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cd4334e80b64..b8c1a85bcf2d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "entry.h" enum { -- cgit v1.2.3 From 3241d3eb7d9bb7a134bedf49db196cf98b58834d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Nov 2017 14:54:04 +0100 Subject: s390: rework __switch_to() to allow larger task_struct offsets If GCC_PLUGIN_RANDSTRUCT is enabled the members of task_struct will be shuffled around. The offsets of the "pid" and "stack" members within task_struct may not necessarily fit into 12 bits anymore, which causes compile errors within __switch_to, since instructions are used, which only have a 12 bit displacement field. Therefore rework __switch_to, to allow for larger offsets. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a316cd6999ad..9e5f6cd8e4c2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) */ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - lgr %r1,%r2 - aghi %r1,__TASK_thread # thread_struct of prev task - lg %r5,__TASK_stack(%r3) # start of kernel stack of next - stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev - lgr %r1,%r3 - aghi %r1,__TASK_thread # thread_struct of next task + lghi %r4,__TASK_stack + lghi %r1,__TASK_thread + lg %r5,0(%r4,%r3) # start of kernel stack of next + stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lgr %r15,%r5 aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack - lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next + aghi %r3,__TASK_pid + mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP bzr %r14 -- cgit v1.2.3 From de35089cc82c33f8be1dec5a0c3d7a05cf91720d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Nov 2017 09:50:40 +0100 Subject: s390/disassembler: remove confusing code When searching the opcode offset table within find_insn() the check "entry->opcode == 0" was intended to clarify that 1-byte opcodes, the first one being 0, are special. However there is no mnemonic for an illegal opcode starting with 0. Therefore there is also no opcode offset table entry that matches, which again means that the check never is true. Therefore just remove the confusing check, and add a comment which hopefully explains how this works. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3be829721cf9..465d52b5e470 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -396,9 +396,14 @@ struct s390_insn *find_insn(unsigned char *code) unsigned char opfrag; int i; + /* Search the opcode offset table to find an entry which + * matches the beginning of the opcode. If there is no match + * the last entry will be used, which is the default entry for + * unknown instructions as well as 1-byte opcode instructions. + */ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { entry = &opcode_offset[i]; - if (entry->opcode == code[0] || entry->opcode == 0) + if (entry->opcode == code[0]) break; } -- cgit v1.2.3 From 0b0882672640ced4deeebf84da0b88b6389619c4 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Nov 2017 16:32:22 +0100 Subject: s390/zcrypt: Fix wrong comparison leading to strange load balancing The function to decide if one zcrypt queue is better than another one compared two pointers instead of comparing the values where the pointers refer to. So within the same zcrypt card when load of each queue was equal just one queue was used. This effect only appears on relatively lite load, typically with one thread applications. This patch fixes the wrong comparison and now the counters show that requests are balanced equally over all available queues within the cards. There is no performance improvement coming with this fix. As long as the queue depth for an APQN queue is not touched, processing is not faster when requests are spread over queues within the same card hardware. So this fix only beautifies the lszcrypt counter printouts. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index b5f4006198b9..a9a56aa9c26b 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -218,8 +218,8 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, weight += atomic_read(&zq->load); pref_weight += atomic_read(&pref_zq->load); if (weight == pref_weight) - return &zq->queue->total_request_count > - &pref_zq->queue->total_request_count; + return zq->queue->total_request_count > + pref_zq->queue->total_request_count; return weight > pref_weight; } -- cgit v1.2.3 From 8e138e0d92c6c9d3d481674fb14e3439b495be37 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Nov 2017 14:50:46 -0500 Subject: btrfs: clear space cache inode generation always We discovered a box that had double allocations, and suspected the space cache may be to blame. While auditing the write out path I noticed that if we've already setup the space cache we will just carry on. This means that any error we hit after cache_save_setup before we go to actually write the cache out we won't reset the inode generation, so whatever was already written will be considered correct, except it'll be stale. Fix this by _always_ resetting the generation on the block group inode, this way we only ever have valid or invalid cache. With this patch I was no longer able to reproduce cache corruption with dm-log-writes and my bpf error injection tool. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 673ac4e01dd0..784d41e95ed9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3502,13 +3502,6 @@ again: goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3532,6 +3525,13 @@ again: } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(fs_info, &fs_info->global_block_rsv); -- cgit v1.2.3 From 10809bb976648ac58194a629e3d7af99e7400297 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: ACPI / bus: Leave modalias empty for devices which are not present Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 24418932612e..a041689e5701 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the -- cgit v1.2.3 From a64a62ce9a380213dc9e192f762266d70c9b40ec Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 26 Sep 2017 16:54:09 +0800 Subject: ACPI / EC: Fix regression related to PM ops support in ECDT device On platforms (ASUS X550ZE and possibly all ASUS X series) with valid ECDT EC but invalid DSDT EC, EC PM ops won't be invoked as ECDT EC is not an ACPI device. Thus the following commit actually removed post-resume acpi_ec_enable_event() invocation for such platforms, and triggered a regression on them that after being resumed, EC (actually should be ECDT) driver stops handling EC events: Commit: c2b46d679b30c5c0d7eb47a21085943242bdd8dc Subject: ACPI / EC: Add PM operations to improve event handling for resume process Notice that the root cause actually is "ECDT is not an ACPI device" rather than "the timing of acpi_ec_enable_event() invocation", this patch fixes this issue by enumerating ECDT EC as an ACPI device. Due to the existence of the noirq stage, the ability of tuning the timing of acpi_ec_enable_event() invocation is still meaningful. This patch is a little bit different from the posted fix by moving acpi_config_boot_ec() from acpi_ec_ecdt_start() to acpi_ec_add() to make sure that EC event handling won't be stopped as long as the ACPI EC driver is bound. Thus the following sequence shouldn't disable EC event handling: unbind,suspend,resume,bind. Fixes: c2b46d679b30 (ACPI / EC: Add PM operations to improve event handling for resume process) Link: https://bugzilla.kernel.org/show_bug.cgi?id=196847 Reported-by: Luya Tshimbalanga Tested-by: Luya Tshimbalanga Cc: 4.9+ # 4.9+ Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 69 +++++++++++++++++++++++++++++---------------- drivers/acpi/internal.h | 1 + drivers/acpi/scan.c | 21 ++++++++++++++ include/acpi/acpi_bus.h | 1 + include/acpi/acpi_drivers.h | 1 + 5 files changed, 69 insertions(+), 24 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 82b3ce5e937e..df842465634a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1597,32 +1597,41 @@ static int acpi_ec_add(struct acpi_device *device) { struct acpi_ec *ec = NULL; int ret; + bool is_ecdt = false; + acpi_status status; strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_EC_CLASS); - ec = acpi_ec_alloc(); - if (!ec) - return -ENOMEM; - if (ec_parse_device(device->handle, 0, ec, NULL) != - AE_CTRL_TERMINATE) { + if (!strcmp(acpi_device_hid(device), ACPI_ECDT_HID)) { + is_ecdt = true; + ec = boot_ec; + } else { + ec = acpi_ec_alloc(); + if (!ec) + return -ENOMEM; + status = ec_parse_device(device->handle, 0, ec, NULL); + if (status != AE_CTRL_TERMINATE) { ret = -EINVAL; goto err_alloc; + } } if (acpi_is_boot_ec(ec)) { - boot_ec_is_ecdt = false; - /* - * Trust PNP0C09 namespace location rather than ECDT ID. - * - * But trust ECDT GPE rather than _GPE because of ASUS quirks, - * so do not change boot_ec->gpe to ec->gpe. - */ - boot_ec->handle = ec->handle; - acpi_handle_debug(ec->handle, "duplicated.\n"); - acpi_ec_free(ec); - ec = boot_ec; - ret = acpi_config_boot_ec(ec, ec->handle, true, false); + boot_ec_is_ecdt = is_ecdt; + if (!is_ecdt) { + /* + * Trust PNP0C09 namespace location rather than + * ECDT ID. But trust ECDT GPE rather than _GPE + * because of ASUS quirks, so do not change + * boot_ec->gpe to ec->gpe. + */ + boot_ec->handle = ec->handle; + acpi_handle_debug(ec->handle, "duplicated.\n"); + acpi_ec_free(ec); + ec = boot_ec; + } + ret = acpi_config_boot_ec(ec, ec->handle, true, is_ecdt); } else ret = acpi_ec_setup(ec, true); if (ret) @@ -1635,8 +1644,10 @@ static int acpi_ec_add(struct acpi_device *device) ret = !!request_region(ec->command_addr, 1, "EC cmd"); WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr); - /* Reprobe devices depending on the EC */ - acpi_walk_dep_device_list(ec->handle); + if (!is_ecdt) { + /* Reprobe devices depending on the EC */ + acpi_walk_dep_device_list(ec->handle); + } acpi_handle_debug(ec->handle, "enumerated.\n"); return 0; @@ -1692,6 +1703,7 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context) static const struct acpi_device_id ec_device_ids[] = { {"PNP0C09", 0}, + {ACPI_ECDT_HID, 0}, {"", 0}, }; @@ -1764,11 +1776,14 @@ static int __init acpi_ec_ecdt_start(void) * Note: ec->handle can be valid if this function is called after * acpi_ec_add(), hence the fast path. */ - if (boot_ec->handle != ACPI_ROOT_OBJECT) - handle = boot_ec->handle; - else if (!acpi_ec_ecdt_get_handle(&handle)) - return -ENODEV; - return acpi_config_boot_ec(boot_ec, handle, true, true); + if (boot_ec->handle == ACPI_ROOT_OBJECT) { + if (!acpi_ec_ecdt_get_handle(&handle)) + return -ENODEV; + boot_ec->handle = handle; + } + + /* Register to ACPI bus with PM ops attached */ + return acpi_bus_register_early_device(ACPI_BUS_TYPE_ECDT_EC); } #if 0 @@ -2020,6 +2035,12 @@ int __init acpi_ec_init(void) /* Drivers must be started after acpi_ec_query_init() */ dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); + /* + * Register ECDT to ACPI bus only when PNP0C09 probe fails. This is + * useful for platforms (confirmed on ASUS X550ZE) with valid ECDT + * settings but invalid DSDT settings. + * https://bugzilla.kernel.org/show_bug.cgi?id=196847 + */ ecdt_fail = acpi_ec_ecdt_start(); return ecdt_fail && dsdt_fail ? -ENODEV : 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4361c4415b4f..ede83d38beed 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -115,6 +115,7 @@ bool acpi_device_is_present(const struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); +int acpi_bus_register_early_device(int type); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 602f8ff212f2..2f2f50322ffb 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1024,6 +1024,9 @@ static void acpi_device_get_busid(struct acpi_device *device) case ACPI_BUS_TYPE_SLEEP_BUTTON: strcpy(device->pnp.bus_id, "SLPF"); break; + case ACPI_BUS_TYPE_ECDT_EC: + strcpy(device->pnp.bus_id, "ECDT"); + break; default: acpi_get_name(device->handle, ACPI_SINGLE_NAME, &buffer); /* Clean up trailing underscores (if any) */ @@ -1304,6 +1307,9 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, case ACPI_BUS_TYPE_SLEEP_BUTTON: acpi_add_id(pnp, ACPI_BUTTON_HID_SLEEPF); break; + case ACPI_BUS_TYPE_ECDT_EC: + acpi_add_id(pnp, ACPI_ECDT_HID); + break; } } @@ -2049,6 +2055,21 @@ void acpi_bus_trim(struct acpi_device *adev) } EXPORT_SYMBOL_GPL(acpi_bus_trim); +int acpi_bus_register_early_device(int type) +{ + struct acpi_device *device = NULL; + int result; + + result = acpi_add_single_object(&device, NULL, + type, ACPI_STA_DEFAULT); + if (result) + return result; + + device->flags.match_driver = true; + return device_attach(&device->dev); +} +EXPORT_SYMBOL_GPL(acpi_bus_register_early_device); + static int acpi_bus_scan_fixed(void) { int result = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index fa1505292f6c..324a04df3785 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -105,6 +105,7 @@ enum acpi_bus_device_type { ACPI_BUS_TYPE_THERMAL, ACPI_BUS_TYPE_POWER_BUTTON, ACPI_BUS_TYPE_SLEEP_BUTTON, + ACPI_BUS_TYPE_ECDT_EC, ACPI_BUS_DEVICE_TYPE_COUNT }; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 29c691265b49..14499757338f 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -58,6 +58,7 @@ #define ACPI_VIDEO_HID "LNXVIDEO" #define ACPI_BAY_HID "LNXIOBAY" #define ACPI_DOCK_HID "LNXDOCK" +#define ACPI_ECDT_HID "LNXEC" /* Quirk for broken IBM BIOSes */ #define ACPI_SMBUS_IBM_HID "SMBUSIBM" -- cgit v1.2.3 From f4359cedfb43b934f38c50d1604db21333abe57b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Nov 2017 16:03:01 +0100 Subject: drm/i915: Fix false-positive assert_rpm_wakelock_held in i915_pmic_bus_access_notifier v2 assert_rpm_wakelock_held is triggered from i915_pmic_bus_access_notifier even though it gets unregistered on (runtime) suspend, this is caused by a race happening under the following circumstances: intel_runtime_pm_put does: atomic_dec(&dev_priv->pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); And pm_runtime_put_autosuspend calls intel_runtime_suspend from a workqueue, so there is ample of time between the atomic_dec() and intel_runtime_suspend() unregistering the notifier. If the notifier gets called in this windowd assert_rpm_wakelock_held falsely triggers (at this point we're not runtime-suspended yet). This commit adds disable_rpm_wakeref_asserts and enable_rpm_wakeref_asserts calls around the intel_uncore_forcewake_get(FORCEWAKE_ALL) call in i915_pmic_bus_access_notifier fixing the false-positive WARN_ON. Changes in v2: -Reword comment explaining why disabling the wakeref asserts is ok and necessary Cc: stable@vger.kernel.org Reported-by: FKr Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171110150301.9601-2-hdegoede@redhat.com (cherry picked from commit ce30560c80dead91e98a03d90fb8791e57a9b69d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_uncore.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 20e3c65c0999..cb48c8b92aa3 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1240,8 +1240,15 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, * bus, which will be busy after this notification, leading to: * "render: timed out waiting for forcewake ack request." * errors. + * + * The notifier is unregistered during intel_runtime_suspend(), + * so it's ok to access the HW here without holding a RPM + * wake reference -> disable wakeref asserts for the time of + * the access. */ + disable_rpm_wakeref_asserts(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + enable_rpm_wakeref_asserts(dev_priv); break; case MBI_PMIC_BUS_ACCESS_END: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); -- cgit v1.2.3 From 294cf1af8cf2eb0d1eced377fdfb9a2d3f0e8b42 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 14:55:17 +0100 Subject: drm/i915: Re-register PMIC bus access notifier on runtime resume intel_uncore_suspend() unregisters the uncore code's PMIC bus access notifier and gets called on both normal and runtime suspend. intel_uncore_resume_early() re-registers the notifier, but only on normal resume. Add a new intel_uncore_runtime_resume() function which only re-registers the notifier and call that on runtime resume. Cc: stable@vger.kernel.org Reported-by: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171114135518.15981-2-hdegoede@redhat.com (cherry picked from commit bedf4d79c3654921839b62246b0965ddb308b201) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_uncore.c | 6 ++++++ drivers/gpu/drm/i915/intel_uncore.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3db5851756f0..34191028bbad 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2618,6 +2618,8 @@ static int intel_runtime_resume(struct device *kdev) ret = vlv_resume_prepare(dev_priv, true); } + intel_uncore_runtime_resume(dev_priv); + /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index cb48c8b92aa3..8c2ce81f01c2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -434,6 +434,12 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv) +{ + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); +} + void intel_uncore_sanitize(struct drm_i915_private *dev_priv) { i915_modparams.enable_rc6 = diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 582771251b57..9ce079b5dd0d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -134,6 +134,7 @@ bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv void intel_uncore_fini(struct drm_i915_private *dev_priv); void intel_uncore_suspend(struct drm_i915_private *dev_priv); void intel_uncore_resume_early(struct drm_i915_private *dev_priv); +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From dcd1d8302a06b0a0e5d6f9b6851fa4a5579e79b0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 16:55:34 +0100 Subject: drm/i915/gvt: ensure -ve return value is handled correctly An earlier fix changed the return type from find_bb_size however the integer return is being assigned to a unsigned int so the -ve error check will never be detected. Make bb_size an int to fix this. Detected by CoverityScan CID#1456886 ("Unsigned compared against 0") Fixes: 1e3197d6ad73 ("drm/i915/gvt: Refine error handling for perform_bb_shadow") Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang (cherry picked from commit 24f8a29af4afe7c53e08f4afa0c3fa9eb3791b89) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 701a3c6f1669..85d4c57870fb 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1628,7 +1628,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; - uint32_t bb_size; + int bb_size; void *dst = NULL; int ret = 0; -- cgit v1.2.3 From 6e068270b7883836e4717e262d96d1c43690862a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 12:14:58 +0000 Subject: drm/i915: Clear breadcrumb node when cancelling signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we call intel_engine_cancel_signaling() to stop reporting when a request is completed via an asynchronous signal, we remove that request from the breadcrumb wait queue. However, we may be concurrently processing that request in the signaler itself, the actual operations on the request's node itself are serialised but we do not actually clear the waiter after removing it from the tree allowing both parties to attempt to do so and corrupting the rbtree. (Previously removing from the breadcrumb wait queue could only be done on behalf of i915_wait_request, so this race could not happen). Reported-by: "He, Bo" Fixes: 9eb143bbec7d ("drm/i915: Allow a request to be cancelled") Signed-off-by: Chris Wilson Cc: "He, Bo" Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171115121458.24655-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit c534612e780c4a2c8ef5bfc11583c7d58436baca) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 48e1ba01ccf8..5f8b9f1f40f1 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -517,6 +517,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); + RB_CLEAR_NODE(&wait->node); out: GEM_BUG_ON(b->irq_wait == wait); -- cgit v1.2.3 From 457db89b538ea4eb2a188c75f8f3a83469395ee0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 17:35:20 +0000 Subject: drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") tried to fixup the check_flush_dependency warning for hitting i915_gem_userptr_mn_invalidate_range_start from within the shrinker, but I failed to notice userptr has 2 similarly named workqueues. I marked up i915-userptr-acquire as WQ_MEM_RECLAIM whereas we only wait upon i915-userptr-release from inside the reclaim paths. [62530.869510] workqueue: PF_MEMALLOC task 7983(gem_shrink) is flushing !WQ_MEM_RECLAIM i915-userptr-release: (null) [62530.869515] ------------[ cut here ]------------ [62530.869519] WARNING: CPU: 1 PID: 7983 at kernel/workqueue.c:2434 check_flush_dependency+0x7f/0x110 [62530.869519] Modules linked in: pegasus mii ip6table_filter ip6_tables bnep iptable_filter snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic binfmt_misc nls_iso8859_1 intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel snd_hda_codec kvm_intel snd_hda_core snd_hwdep kvm snd_pcm irqbypass snd_seq_midi snd_seq_midi_event snd_rawmidi crct10dif_pclmul crc32_pclmul 8250_dw ghash_clmulni_intel snd_seq pcbc snd_seq_device snd_timer btusb aesni_intel btrtl btbcm aes_x86_64 iwlwifi btintel crypto_simd glue_helper cryptd bluetooth snd intel_cstate input_leds idma64 intel_rapl_perf ecdh_generic serio_raw soundcore cfg80211 wmi_bmof virt_dma intel_lpss_pci intel_lpss acpi_als kfifo_buf industrialio winbond_cir soc_button_array rc_core spidev tpm_crb intel_hid acpi_pad mac_hid sparse_keymap [62530.869546] parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid i915 i2c_algo_bit prime_numbers drm_kms_helper syscopyarea e1000e sysfillrect sysimgblt fb_sys_fops ahci ptp pps_core libahci drm wmi video i2c_hid hid [62530.869557] CPU: 1 PID: 7983 Comm: gem_shrink Tainted: G U W L 4.14.0-rc8-drm-tip-ww45-commit-1342299+ #1 [62530.869558] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake H DDR4 RVP, BIOS CNLSFWR1.R00.X098.A00.1707301945 07/30/2017 [62530.869559] task: ffffa1049dbeec80 task.stack: ffffae7d05c44000 [62530.869560] RIP: 0010:check_flush_dependency+0x7f/0x110 [62530.869561] RSP: 0018:ffffae7d05c473a0 EFLAGS: 00010286 [62530.869562] RAX: 000000000000006e RBX: ffffa1049540f400 RCX: ffffffffa3e55788 [62530.869562] RDX: 0000000000000000 RSI: 0000000000000092 RDI: 0000000000000202 [62530.869563] RBP: ffffae7d05c473c0 R08: 000000000000006e R09: 000000000038bb0e [62530.869563] R10: 0000000000000000 R11: 000000000000006e R12: ffffa1049dbeec80 [62530.869564] R13: 0000000000000000 R14: 0000000000000000 R15: ffffae7d05c473e0 [62530.869565] FS: 00007f621b129880(0000) GS:ffffa1050b240000(0000) knlGS:0000000000000000 [62530.869566] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [62530.869566] CR2: 00007f6214400000 CR3: 0000000353a17003 CR4: 00000000003606e0 [62530.869567] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [62530.869567] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [62530.869568] Call Trace: [62530.869570] flush_workqueue+0x115/0x3d0 [62530.869573] ? wake_up_process+0x15/0x20 [62530.869596] i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869614] ? i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869616] __mmu_notifier_invalidate_range_start+0x55/0x80 [62530.869618] try_to_unmap_one+0x791/0x8b0 [62530.869620] ? call_rwsem_down_read_failed+0x18/0x30 [62530.869622] rmap_walk_anon+0x10b/0x260 [62530.869624] rmap_walk+0x48/0x60 [62530.869625] try_to_unmap+0x93/0xf0 [62530.869626] ? page_remove_rmap+0x2a0/0x2a0 [62530.869627] ? page_not_mapped+0x20/0x20 [62530.869629] ? page_get_anon_vma+0x90/0x90 [62530.869630] ? invalid_mkclean_vma+0x20/0x20 [62530.869631] migrate_pages+0x946/0xaa0 [62530.869633] ? __ClearPageMovable+0x10/0x10 [62530.869635] ? isolate_freepages_block+0x3c0/0x3c0 [62530.869636] compact_zone+0x22f/0x970 [62530.869638] compact_zone_order+0xa3/0xd0 [62530.869640] try_to_compact_pages+0x1a5/0x2a0 [62530.869641] ? try_to_compact_pages+0x1a5/0x2a0 [62530.869643] __alloc_pages_direct_compact+0x50/0x110 [62530.869644] __alloc_pages_slowpath+0x4da/0xf30 [62530.869646] __alloc_pages_nodemask+0x262/0x280 [62530.869648] alloc_pages_vma+0x165/0x1e0 [62530.869649] shmem_alloc_hugepage+0xd0/0x130 [62530.869651] ? __radix_tree_insert+0x45/0x230 [62530.869652] ? __vm_enough_memory+0x29/0x130 [62530.869654] shmem_alloc_and_acct_page+0x10d/0x1e0 [62530.869655] shmem_getpage_gfp+0x426/0xc00 [62530.869657] shmem_fault+0xa0/0x1e0 [62530.869659] ? file_update_time+0x60/0x110 [62530.869660] __do_fault+0x1e/0xc0 [62530.869661] __handle_mm_fault+0xa35/0x1170 [62530.869662] handle_mm_fault+0xcc/0x1c0 [62530.869664] __do_page_fault+0x262/0x4f0 [62530.869666] do_page_fault+0x2e/0xe0 [62530.869667] page_fault+0x22/0x30 [62530.869668] RIP: 0033:0x404335 [62530.869669] RSP: 002b:00007fff7829e420 EFLAGS: 00010216 [62530.869670] RAX: 00007f6210400000 RBX: 0000000000000004 RCX: 0000000000b80000 [62530.869670] RDX: 0000000000002e01 RSI: 0000000000008000 RDI: 0000000000000004 [62530.869671] RBP: 0000000000000019 R08: 0000000000000002 R09: 0000000000000000 [62530.869671] R10: 0000000000000559 R11: 0000000000000246 R12: 0000000008000000 [62530.869672] R13: 00000000004042f0 R14: 0000000000000004 R15: 000000000000007e [62530.869673] Code: 00 8b b0 18 05 00 00 48 8d 8b b0 00 00 00 48 8d 90 c0 06 00 00 4d 89 f0 48 c7 c7 40 c0 c8 a3 c6 05 68 c5 e8 00 01 e8 c2 68 04 00 <0f> ff 4d 85 ed 74 18 49 8b 45 20 48 8b 70 08 8b 86 00 01 00 00 [62530.869691] ---[ end trace 01e01ad0ff5781f8 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103739 Fixes: 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") Signed-off-by: Chris Wilson Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171114173520.8829-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld (cherry picked from commit 41729bf2248bc8593e5103d43974079cc269524c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e26b23171b56..ccde12d9e5f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -172,7 +172,9 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); + mn->wq = alloc_workqueue("i915-userptr-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0); if (mn->wq == NULL) { kfree(mn); return ERR_PTR(-ENOMEM); @@ -827,7 +829,7 @@ int i915_gem_init_userptr(struct drm_i915_private *dev_priv) dev_priv->mm.userptr_wq = alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_MEM_RECLAIM, + WQ_HIGHPRI | WQ_UNBOUND, 0); if (!dev_priv->mm.userptr_wq) return -ENOMEM; -- cgit v1.2.3 From 3572f04c69ed4369da5d3c65d84fb18774aa60b6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 16 Nov 2017 18:02:15 +0200 Subject: drm/i915: Fix init_clock_gating for resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving the init_clock_gating() call from intel_modeset_init_hw() to intel_modeset_gem_init() had an unintended effect of not applying some workarounds on resume. This, for example, cause some kind of corruption to appear at the top of my IVB Thinkpad X1 Carbon LVDS screen after hibernation. Fix the problem by explicitly calling init_clock_gating() from the resume path. I really hope this doesn't break something else again. At least the problems reported at https://bugs.freedesktop.org/show_bug.cgi?id=103549 didn't make a comeback, even after a hibernate cycle. v2: Reorder the init_clock_gating vs. modeset_init_hw to match the display reset path (Rodrigo) Cc: stable@vger.kernel.org Cc: Chris Wilson Cc: Rodrigo Vivi Fixes: 6ac43272768c ("drm/i915: Move init_clock_gating() back to where it was") Reviewed-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171116160215.25715-1-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 675f7ff35bd256e65d3d0f52718d8babf5d1002a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 34191028bbad..7d9b07df32fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1714,6 +1714,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_guc_resume(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) -- cgit v1.2.3 From 0d307935fefa6389eb726c6362351c162c949101 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: cpufreq: Add Loongson machine dependencies The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4ebae43118ef..d8addbce40bc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -275,6 +275,7 @@ config BMIPS_CPUFREQ config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -287,6 +288,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. -- cgit v1.2.3 From 7e8a09e05a0f0ccaf6b2a16f12ed6edc8e62c47d Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 13:32:01 -0800 Subject: cpufreq: mediatek: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/cpufreq/mediatek-cpufreq.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mediatek-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 18c4bd9a5c65..e0d5090b303d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -620,3 +620,7 @@ static int __init mtk_cpufreq_driver_init(void) return 0; } device_initcall(mtk_cpufreq_driver_init); + +MODULE_DESCRIPTION("MediaTek CPUFreq driver"); +MODULE_AUTHOR("Pi-Cheng Chen "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 1addb798e93893d33c8dfab743cd44f09fd7719a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 8 Nov 2017 17:29:44 +0100 Subject: null_blk: fix dev->badblocks leak null_alloc_dev() allocates memory for dev->badblocks, but cleanup currently only occurs in the configfs release codepath, missing a number of other places. This bug was found running the blktests block/010 test, alongside kmemleak: rapido1:/blktests# ./check block/010 ... rapido1:/blktests# echo scan > /sys/kernel/debug/kmemleak [ 306.966708] kmemleak: 32 new suspected memory leaks (see /sys/kernel/debug/kmemleak) rapido1:/blktests# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88001f86d000 (size 4096): comm "modprobe", pid 231, jiffies 4294892415 (age 318.252s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x9f/0xe0 [] badblocks_init+0x2f/0x60 [] 0xffffffffa0019fae [] nullb_device_badblocks_store+0x63/0x130 [null_blk] [] do_one_initcall+0x3d/0x170 [] do_init_module+0x56/0x1e9 [] load_module+0x1c47/0x26a0 [] SyS_finit_module+0xa9/0xd0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Fixes: 2f54a613c942 ("nullb: badbblocks support") Reviewed-by: Shaohua Li Signed-off-by: David Disseldorp Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c61960deb74a..ccb9975a97fa 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); - badblocks_exit(&dev->badblocks); null_free_device_storage(dev, false); null_free_dev(dev); } @@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void) static void null_free_dev(struct nullb_device *dev) { + if (!dev) + return; + + badblocks_exit(&dev->badblocks); kfree(dev); } -- cgit v1.2.3 From f341a4d384f7fced2ca0d9472ed88fe94de32726 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 22 Nov 2017 13:18:05 -0500 Subject: block: remove useless assignment in bio_split Remove useless assignment to the variable "split" because the variable is unconditionally assigned later. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 228229f3bb76..8bfdea58159b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio); struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { - struct bio *split = NULL; + struct bio *split; BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); -- cgit v1.2.3 From ded13fc11b71fd1351e57c68a130d89a0285f1b6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Nov 2017 14:38:53 +1100 Subject: KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts This fixes two errors that prevent a guest using the HPT MMU from successfully migrating to a POWER9 host in radix MMU mode, or resizing its HPT when running on a radix host. The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information", 2017-09-11) missed two uses of hpte_base_page_size(), one in the HPT rehashing code and one in kvm_htab_write() (which is used on the destination side in migrating a HPT guest). Instead we use kvmppc_hpte_base_page_shift(). Having the shift count means that we can use left and right shifts instead of multiplication and division in a few places. Along the way, this adds a check in kvm_htab_write() to ensure that the page size encoding in the incoming HPTEs is recognized, and if not return an EINVAL error to userspace. The second bug was that kvm_htab_write was performing some but not all of the functions of kvmhv_setup_mmu(), resulting in the destination VM being left in radix mode as far as the hardware is concerned. The simplest fix for now is make kvm_htab_write() call kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does. In future it would be better to refactor the code more extensively to remove the duplication. Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information") Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather than SDR1 on POWER9") Reported-by: Suraj Jitindar Singh Tested-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++-------------- arch/powerpc/kvm/book3s_hv.c | 3 +-- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 96753f3aac6d..941c2a3f231b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct iommu_group *grp); extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); +extern void kvmppc_setup_partition_table(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 235319c2574e..966097232d21 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, unsigned long vpte, rpte, guest_rpte; int ret; struct revmap_entry *rev; - unsigned long apsize, psize, avpn, pteg, hash; + unsigned long apsize, avpn, pteg, hash; unsigned long new_idx, new_pteg, replace_vpte; + int pshift; hptep = (__be64 *)(old->virt + (idx << 4)); @@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, goto out; rpte = be64_to_cpu(hptep[1]); - psize = hpte_base_page_size(vpte, rpte); - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); pteg = idx / HPTES_PER_GROUP; if (vpte & HPTE_V_SECONDARY) pteg = ~pteg; @@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, offset = (avpn & 0x1f) << 23; vsid = avpn >> 5; /* We can find more bits from the pteg value */ - if (psize < (1ULL << 23)) - offset |= ((vsid ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (offset / psize); + hash = vsid ^ (offset >> pshift); } else { unsigned long offset, vsid; /* We only have 40 - 23 bits of seg_off in avpn */ offset = (avpn & 0x1ffff) << 23; vsid = avpn >> 17; - if (psize < (1ULL << 23)) - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (vsid << 25) ^ (offset / psize); + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); } new_pteg = hash & new_hash_mask; @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ssize_t nb; long int err, ret; int mmu_ready; + int pshift; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; + pshift = kvmppc_hpte_base_page_shift(v, r); + if (pshift <= 0) + goto out; lbuf += 2; nb += HPTE_SIZE; @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!mmu_ready && is_vrma_hpte(v)) { - unsigned long psize = hpte_base_page_size(v, r); - unsigned long senc = slb_pgsize_encoding(psize); - unsigned long lpcr; + unsigned long senc, lpcr; + senc = slb_pgsize_encoding(1ul << pshift); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = senc << (LPCR_VRMASD_SH - 4); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, + LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } mmu_ready = 1; } ++i; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 79ea3d9269db..2d46037ce936 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -static void kvmppc_setup_partition_table(struct kvm *kvm); static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } -static void kvmppc_setup_partition_table(struct kvm *kvm) +void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1; -- cgit v1.2.3 From 8c97eeccf0ad8783c057830119467b877bdfced7 Mon Sep 17 00:00:00 2001 From: Jeff Lien Date: Tue, 21 Nov 2017 10:44:37 -0600 Subject: nvme-pci: add quirk for delay before CHK RDY for WDC SN200 And increase the existing delay to cover this device as well. Cc: stable@vger.kernel.org Signed-off-by: Jeff Lien Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c0873a68872f..ea1aa5283e8e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req) * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * found empirically. */ -#define NVME_QUIRK_DELAY_AMOUNT 2000 +#define NVME_QUIRK_DELAY_AMOUNT 2300 enum nvme_ctrl_state { NVME_CTRL_NEW, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58dbe684007b..617374762b7c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2673,6 +2673,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ -- cgit v1.2.3 From b8a3365a30c463e9105969ab1bf8674b763e3408 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 23 Nov 2017 11:37:37 +0100 Subject: drm/vblank: Pass crtc_id to page_flip_ioctl. We added crtc_id to the atomic ioctl, but forgot to add it for vblank and page flip events. Commit bd386e518056 ("drm: Reorganize drm_pending_event to support future event types [v2]") added it to the vblank event, but page flip event was still missing. Correct this and add a test for making sure we always set crtc_id correctly. Fixes: bd386e518056 ("drm: Reorganize drm_pending_event to support future event types [v2]") Fixes: 5db06a8a98f5 ("drm: Pass CRTC ID in userspace vblank events") Cc: Daniel Stone Cc: Daniel Vetter Cc: Gustavo Padovan Cc: Sean Paul Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Reviewed-by: Daniel Vetter #irc Testcase: igt/kms_vblank/crtc_id Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171123103737.47138-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 19404e34cd59..37a93cdffb4a 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1030,6 +1030,7 @@ retry: e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.user_data = page_flip->user_data; + e->event.vbl.crtc_id = crtc->base.id; ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); -- cgit v1.2.3 From 12841f87b7a8ceb3d54f171660f72a86941bfcb3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 23 Nov 2017 09:08:57 +0530 Subject: cxl: Check if vphb exists before iterating over AFU devices During an eeh a kernel-oops is reported if no vPHB is allocated to the AFU. This happens as during AFU init, an error in creation of vPHB is a non-fatal error. Hence afu->phb should always be checked for NULL before iterating over it for the virtual AFU pci devices. This patch fixes the kenel-oops by adding a NULL pointer check for afu->phb before it is dereferenced. Fixes: 9e8df8a21963 ("cxl: EEH support") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index bb7fd3f4edab..19969ee86d6f 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -2083,6 +2083,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ + if (afu->phb == NULL) + return result; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (!afu_dev->driver) continue; @@ -2124,8 +2127,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, * Tell the AFU drivers; but we don't care what they * say, we're going away. */ - if (afu->phb != NULL) - cxl_vphb_error_detected(afu, state); + cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; } @@ -2265,6 +2267,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_afu_select_best_mode(afu)) goto err; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -2327,6 +2332,9 @@ static void cxl_pci_resume(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (afu_dev->driver && afu_dev->driver->err_handler && afu_dev->driver->err_handler->resume) -- cgit v1.2.3 From a3961f824cdbe7eb431254dc7d8f6f6767f474aa Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 22 Nov 2017 23:02:07 +0530 Subject: powerpc/powernv: Fix kexec crashes caused by tlbie tracing Rebooting into a new kernel with kexec fails in trace_tlbie() which is called from native_hpte_clear(). This happens if the running kernel has CONFIG_LOCKDEP enabled. With lockdep enabled, the tracepoints always execute few RCU checks regardless of whether tracing is on or off. We are already in the last phase of kexec sequence in real mode with HILE_BE set. At this point the RCU check ends up in RCU_LOCKDEP_WARN and causes kexec to fail. Fix this by not calling trace_tlbie() from native_hpte_clear(). mpe: It's not safe to call trace points at this point in the kexec path, even if we could avoid the RCU checks/warnings. The only solution is to not call them. Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Mahesh Salgaonkar Reported-by: Aneesh Kumar K.V Suggested-by: Michael Ellerman Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3848af167df9..640cf566e986 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,7 +47,8 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline unsigned long ___tlbie(unsigned long vpn, int psize, + int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } - trace_tlbie(0, 0, va, 0, 0, 0, 0); + return va; +} + +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + unsigned long rb; + + rb = ___tlbie(vpn, psize, apsize, ssize); + trace_tlbie(0, 0, rb, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -652,7 +661,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); hptep->v = 0; - __tlbie(vpn, psize, apsize, ssize); + ___tlbie(vpn, psize, apsize, ssize); } } -- cgit v1.2.3 From 34c089e806793a66e450b11bd167db6047399fcd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:27 +0200 Subject: mmc: block: Fix missing blk_put_request() Ensure blk_get_request() is paired with blk_put_request(). Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ea80ff4cd7f9..f60939858586 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -236,6 +236,7 @@ static ssize_t power_ro_lock_store(struct device *dev, req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (!ret) { pr_info("%s: Locking boot partition ro until next power on\n", @@ -2557,6 +2558,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) *val = ret; ret = 0; } + blk_put_request(req); return ret; } @@ -2587,6 +2589,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); err = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (err) { pr_err("FAILED %d\n", err); goto out_free; -- cgit v1.2.3 From fb8e456e547ed2c699f64665bd8a3b9bde7b9728 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:28 +0200 Subject: mmc: block: Check return value of blk_get_request() blk_get_request() can fail, always check the return value. Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 3ecd8cf23f88 ("mmc: block: move multi-ioctl() to use block layer") Fixes: 614f0388f580 ("mmc: block: move single ioctl() commands to block requests") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f60939858586..4a319ddbd956 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -233,6 +233,10 @@ static ssize_t power_ro_lock_store(struct device *dev, /* Dispatch locking to the block layer */ req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM); + if (IS_ERR(req)) { + count = PTR_ERR(req); + goto out_put; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -249,7 +253,7 @@ static ssize_t power_ro_lock_store(struct device *dev, set_disk_ro(part_md->disk, 1); } } - +out_put: mmc_blk_put(md); return count; } @@ -625,6 +629,10 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_done; + } idatas[0] = idata; req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; @@ -692,6 +700,10 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_err; + } req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idata; @@ -2551,6 +2563,8 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) /* Ask the block layer about the card status */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) + return PTR_ERR(req); req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -2585,6 +2599,10 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) /* Ask the block layer for the EXT CSD */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out_free; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD; req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); -- cgit v1.2.3 From ebe7dd45cf49e3b49cacbaace17f9f878f21fbea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:29 +0200 Subject: mmc: core: Do not leave the block driver in a suspended state The block driver must be resumed if the mmc bus fails to suspend the card. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index a4b49e25fe96..7586ff2ad1f1 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -157,6 +157,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } -- cgit v1.2.3 From f9f0da98819503b06b35e61869d18cf3a8cd3323 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:30 +0200 Subject: mmc: block: Ensure that debugfs files are removed The card is not necessarily being removed, but the debugfs files must be removed when the driver is removed, otherwise they will continue to exist after unbinding the card from the driver. e.g. # echo "mmc1:0001" > /sys/bus/mmc/drivers/mmcblk/unbind # cat /sys/kernel/debug/mmc1/mmc1\:0001/ext_csd [ 173.634584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 173.643356] IP: mmc_ext_csd_open+0x5e/0x170 A complication is that the debugfs_root may have already been removed, so check for that too. Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 44 +++++++++++++++++++++++++++++++++++++------- drivers/mmc/core/debugfs.c | 1 + 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4a319ddbd956..ccfa98af1dd3 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -122,6 +122,10 @@ struct mmc_blk_data { struct device_attribute force_ro; struct device_attribute power_ro_lock; int area_type; + + /* debugfs files (only in main mmc_blk_data) */ + struct dentry *status_dentry; + struct dentry *ext_csd_dentry; }; /* Device type for RPMB character devices */ @@ -2653,7 +2657,7 @@ static const struct file_operations mmc_dbg_ext_csd_fops = { .llseek = default_llseek, }; -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { struct dentry *root; @@ -2663,28 +2667,53 @@ static int mmc_blk_add_debugfs(struct mmc_card *card) root = card->debugfs_root; if (mmc_card_mmc(card) || mmc_card_sd(card)) { - if (!debugfs_create_file("status", S_IRUSR, root, card, - &mmc_dbg_card_status_fops)) + md->status_dentry = + debugfs_create_file("status", S_IRUSR, root, card, + &mmc_dbg_card_status_fops); + if (!md->status_dentry) return -EIO; } if (mmc_card_mmc(card)) { - if (!debugfs_create_file("ext_csd", S_IRUSR, root, card, - &mmc_dbg_ext_csd_fops)) + md->ext_csd_dentry = + debugfs_create_file("ext_csd", S_IRUSR, root, card, + &mmc_dbg_ext_csd_fops); + if (!md->ext_csd_dentry) return -EIO; } return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ + if (!card->debugfs_root) + return; + + if (!IS_ERR_OR_NULL(md->status_dentry)) { + debugfs_remove(md->status_dentry); + md->status_dentry = NULL; + } + + if (!IS_ERR_OR_NULL(md->ext_csd_dentry)) { + debugfs_remove(md->ext_csd_dentry); + md->ext_csd_dentry = NULL; + } +} #else -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ +} + #endif /* CONFIG_DEBUG_FS */ static int mmc_blk_probe(struct mmc_card *card) @@ -2724,7 +2753,7 @@ static int mmc_blk_probe(struct mmc_card *card) } /* Add two debugfs entries */ - mmc_blk_add_debugfs(card); + mmc_blk_add_debugfs(card, md); pm_runtime_set_autosuspend_delay(&card->dev, 3000); pm_runtime_use_autosuspend(&card->dev); @@ -2750,6 +2779,7 @@ static void mmc_blk_remove(struct mmc_card *card) { struct mmc_blk_data *md = dev_get_drvdata(&card->dev); + mmc_blk_remove_debugfs(card, md); mmc_blk_remove_parts(card, md); pm_runtime_get_sync(&card->dev); mmc_claim_host(card->host); diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 01e459a34f33..0f4a7d7b2626 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -314,4 +314,5 @@ err: void mmc_remove_card_debugfs(struct mmc_card *card) { debugfs_remove_recursive(card->debugfs_root); + card->debugfs_root = NULL; } -- cgit v1.2.3 From 52884f8f66c893e92a73787b5bef32b2ef52e1bc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 20 Nov 2017 11:56:47 -0800 Subject: mmc: sdhci-msm: Optionally wait for signal level changes Not all instances of the SDCC core supports changing signal voltage and as such will not generate a power interrupt when the software attempts to change the voltage. This results in probing the eMMC on some devices to take over 2 minutes. Check that the SWITCHABLE_SIGNALING_VOLTAGE bit in MCI_GENERICS is set before waiting for the power interrupt. Cc: Sahitya Tummala Cc: Vijay Viswanath Fixes: c0309b3803fe ("mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr irq") Signed-off-by: Bjorn Andersson Tested-by: Luca Weiss Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 3fb7d2eec93f..c283291db705 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -29,6 +29,9 @@ #define CORE_VERSION_MAJOR_MASK (0xf << CORE_VERSION_MAJOR_SHIFT) #define CORE_VERSION_MINOR_MASK 0xff +#define CORE_MCI_GENERICS 0x70 +#define SWITCHABLE_SIGNALING_VOLTAGE BIT(29) + #define CORE_HC_MODE 0x78 #define HC_MODE_EN 0x1 #define CORE_POWER 0x0 @@ -1028,11 +1031,22 @@ static void sdhci_msm_check_power_status(struct sdhci_host *host, u32 req_type) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); bool done = false; + u32 val; pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n", mmc_hostname(host->mmc), __func__, req_type, msm_host->curr_pwr_state, msm_host->curr_io_level); + /* + * The power interrupt will not be generated for signal voltage + * switches if SWITCHABLE_SIGNALING_VOLTAGE in MCI_GENERICS is not set. + */ + val = readl(msm_host->core_mem + CORE_MCI_GENERICS); + if ((req_type & REQ_IO_HIGH || req_type & REQ_IO_LOW) && + !(val & SWITCHABLE_SIGNALING_VOLTAGE)) { + return; + } + /* * The IRQ for request type IO High/LOW will be generated when - * there is a state change in 1.8V enable bit (bit 3) of -- cgit v1.2.3 From 33d22c2ed6dbe1199fc78e84549b1117fd652c01 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 23 Nov 2017 12:12:17 +1000 Subject: drm/ttm: don't attempt to use hugepages if dma32 requested (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit below introduced thp support for ttm allocations, however it didn't take into account the case where dma32 was requested. Some drivers always request dma32, and the bochs driver is one of those. This fixes an oops: [ 30.108507] ------------[ cut here ]------------ [ 30.108920] kernel BUG at ./include/linux/gfp.h:408! [ 30.109356] invalid opcode: 0000 [#1] SMP [ 30.109700] Modules linked in: fuse nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack devlink ip_set nfnetlink ebtable_nat ebtable_broute bridge ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security ebtable_filter ebtables ip6table_filter ip6_tables snd_hda_codec_generic kvm_intel kvm snd_hda_intel snd_hda_codec irqbypass ppdev snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm bochs_drm ttm joydev drm_kms_helper virtio_balloon snd_timer snd parport_pc drm soundcore parport i2c_piix4 nls_utf8 isofs squashfs zstd_decompress xxhash 8021q garp mrp stp llc virtio_net [ 30.115605] virtio_console virtio_scsi crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel serio_raw virtio_pci virtio_ring virtio ata_generic pata_acpi qemu_fw_cfg sunrpc scsi_transport_iscsi loop [ 30.117425] CPU: 0 PID: 1347 Comm: gnome-shell Not tainted 4.15.0-0.rc0.git6.1.fc28.x86_64 #1 [ 30.118141] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 30.118866] task: ffff923a77e03380 task.stack: ffffa78182228000 [ 30.119366] RIP: 0010:__alloc_pages_nodemask+0x35e/0x430 [ 30.119810] RSP: 0000:ffffa7818222bba8 EFLAGS: 00010202 [ 30.120250] RAX: 0000000000000001 RBX: 00000000014382c6 RCX: 0000000000000006 [ 30.120840] RDX: 0000000000000000 RSI: 0000000000000009 RDI: 0000000000000000 [ 30.121443] RBP: ffff923a760d6000 R08: 0000000000000000 R09: 0000000000000006 [ 30.122039] R10: 0000000000000040 R11: 0000000000000300 R12: ffff923a729273c0 [ 30.122629] R13: 0000000000000000 R14: 0000000000000000 R15: ffff923a7483d400 [ 30.123223] FS: 00007fe48da7dac0(0000) GS:ffff923a7cc00000(0000) knlGS:0000000000000000 [ 30.123896] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 30.124373] CR2: 00007fe457b73000 CR3: 0000000078313000 CR4: 00000000000006f0 [ 30.124968] Call Trace: [ 30.125186] ttm_pool_populate+0x19b/0x400 [ttm] [ 30.125578] ttm_bo_vm_fault+0x325/0x570 [ttm] [ 30.125964] __do_fault+0x19/0x11e [ 30.126255] __handle_mm_fault+0xcd3/0x1260 [ 30.126609] handle_mm_fault+0x14c/0x310 [ 30.126947] __do_page_fault+0x28c/0x530 [ 30.127282] do_page_fault+0x32/0x270 [ 30.127593] async_page_fault+0x22/0x30 [ 30.127922] RIP: 0033:0x7fe48aae39a8 [ 30.128225] RSP: 002b:00007ffc21c4d928 EFLAGS: 00010206 [ 30.128664] RAX: 00007fe457b73000 RBX: 000055cd4c1041a0 RCX: 00007fe457b73040 [ 30.129259] RDX: 0000000000300000 RSI: 0000000000000000 RDI: 00007fe457b73000 [ 30.129855] RBP: 0000000000000300 R08: 000000000000000c R09: 0000000100000000 [ 30.130457] R10: 0000000000000001 R11: 0000000000000246 R12: 000055cd4c1041a0 [ 30.131054] R13: 000055cd4bdfe990 R14: 000055cd4c104110 R15: 0000000000000400 [ 30.131648] Code: 11 01 00 0f 84 a9 00 00 00 65 ff 0d 6d cc dd 44 e9 0f ff ff ff 40 80 cd 80 e9 99 fe ff ff 48 89 c7 e8 e7 f6 01 00 e9 b7 fe ff ff <0f> 0b 0f ff e9 40 fd ff ff 65 48 8b 04 25 80 d5 00 00 8b 40 4c [ 30.133245] RIP: __alloc_pages_nodemask+0x35e/0x430 RSP: ffffa7818222bba8 [ 30.133836] ---[ end trace d4f1deb60784f40a ]--- v2: handle free path as well. Reported-by: Laura Abbott Reported-by: Adam Williamson Fixes: 0284f1ead87463bc17cf5e81a24fc65c052486f3 (drm/ttm: add transparent huge page support for cached allocations v2) Reviewed-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 316f831ad5f0..b0551aa677b8 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -744,12 +744,14 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE - for (j = 0; j < HPAGE_PMD_NR; ++j) - if (p++ != pages[i + j]) - break; + if (!(flags & TTM_PAGE_FLAG_DMA32)) { + for (j = 0; j < HPAGE_PMD_NR; ++j) + if (p++ != pages[i + j]) + break; - if (j == HPAGE_PMD_NR) - order = HPAGE_PMD_ORDER; + if (j == HPAGE_PMD_NR) + order = HPAGE_PMD_ORDER; + } #endif if (page_count(pages[i]) != 1) @@ -865,20 +867,22 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, i = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - while (npages >= HPAGE_PMD_NR) { - gfp_t huge_flags = gfp_flags; + if (!(gfp_flags & GFP_DMA32)) { + while (npages >= HPAGE_PMD_NR) { + gfp_t huge_flags = gfp_flags; - huge_flags |= GFP_TRANSHUGE; - huge_flags &= ~__GFP_MOVABLE; - huge_flags &= ~__GFP_COMP; - p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); - if (!p) - break; + huge_flags |= GFP_TRANSHUGE; + huge_flags &= ~__GFP_MOVABLE; + huge_flags &= ~__GFP_COMP; + p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); + if (!p) + break; - for (j = 0; j < HPAGE_PMD_NR; ++j) - pages[i++] = p++; + for (j = 0; j < HPAGE_PMD_NR; ++j) + pages[i++] = p++; - npages -= HPAGE_PMD_NR; + npages -= HPAGE_PMD_NR; + } } #endif -- cgit v1.2.3 From 612ea091fc77770d659b82ea44a1d5646e0af54c Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:03 +0800 Subject: blk-wbt: remove duplicated setting in wbt_init rwb->wc and rwb->queue_depth were overwritten by wbt_set_write_cache and wbt_set_queue_depth, remove the default setting. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index b252da0e4c11..ec903532a5d1 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -723,8 +723,6 @@ int wbt_init(struct request_queue *q) init_waitqueue_head(&rwb->rq_wait[i].wait); } - rwb->wc = 1; - rwb->queue_depth = RWB_DEF_DEPTH; rwb->last_comp = rwb->last_issue = jiffies; rwb->queue = q; rwb->win_nsec = RWB_WINDOW_NSEC; -- cgit v1.2.3 From f680474345f1fd6329d1806d6358066fd3b07f02 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:40 +0800 Subject: blk-sysfs: remove NULL pointer checking in queue_wb_lat_store wbt_init doesn't set q->rq_wb to NULL, if wbt_init return 0, so check return value is enough, remove NULL checking. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e54be402899d..870484eaed1f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ret = wbt_init(q); if (ret) return ret; - - rwb = q->rq_wb; - if (!rwb) - return -EINVAL; } + rwb = q->rq_wb; if (val == -1) rwb->min_lat_nsec = wbt_default_latency_nsec(q); else if (val >= 0) -- cgit v1.2.3 From 62d772fa9d86856d828cd24dd8fff5c83275c7e1 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:55 +0800 Subject: blk-wbt: move wbt_clear_stat to common place in wbt_done wbt_done call wbt_clear_stat no matter current stat was tracked or not, move it to common place. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index ec903532a5d1..19faecc5f8f6 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) if (wbt_is_read(stat)) wb_timestamp(rwb, &rwb->last_comp); - wbt_clear_state(stat); } else { WARN_ON_ONCE(stat == rwb->sync_cookie); __wbt_done(rwb, wbt_stat_to_mask(stat)); - wbt_clear_state(stat); } + wbt_clear_state(stat); } /* -- cgit v1.2.3 From 3dfbdc44d69b2cd7e382fd084a5c860d2cea24f6 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:40:10 +0800 Subject: blk-wbt: fix comments typo Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 19faecc5f8f6..ae8de9780085 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -481,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, set, then + * kswapd trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing -- cgit v1.2.3 From 8e1fa89aa8bc2870009b4486644e4a58f2e2a4f5 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 10 Nov 2017 11:04:52 +0100 Subject: crypto: algif_aead - skip SGL entries with NULL page The TX SGL may contain SGL entries that are assigned a NULL page. This may happen if a multi-stage AIO operation is performed where the data for each stage is pointed to by one SGL entry. Upon completion of that stage, af_alg_pull_tsgl will assign NULL to the SGL entry. The NULL cipher used to copy the AAD from TX SGL to the destination buffer, however, cannot handle the case where the SGL starts with an SGL entry having a NULL page. Thus, the code needs to advance the start pointer into the SGL to the first non-NULL entry. This fixes a crash visible on Intel x86 32 bit using the libkcapi test suite. Cc: Fixes: 72548b093ee38 ("crypto: algif_aead - copy AAD from src to dst") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index aacae0837aff..e2068b78993c 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -101,10 +101,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, struct aead_tfm *aeadc = pask->private; struct crypto_aead *tfm = aeadc->aead; struct crypto_skcipher *null_tfm = aeadc->null_tfm; - unsigned int as = crypto_aead_authsize(tfm); + unsigned int i, as = crypto_aead_authsize(tfm); struct af_alg_async_req *areq; - struct af_alg_tsgl *tsgl; - struct scatterlist *src; + struct af_alg_tsgl *tsgl, *tmp; + struct scatterlist *rsgl_src, *tsgl_src = NULL; int err = 0; size_t used = 0; /* [in] TX bufs to be en/decrypted */ size_t outlen = 0; /* [out] RX bufs produced by kernel */ @@ -178,7 +178,22 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, } processed = used + ctx->aead_assoclen; - tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list); + list_for_each_entry_safe(tsgl, tmp, &ctx->tsgl_list, list) { + for (i = 0; i < tsgl->cur; i++) { + struct scatterlist *process_sg = tsgl->sg + i; + + if (!(process_sg->length) || !sg_page(process_sg)) + continue; + tsgl_src = process_sg; + break; + } + if (tsgl_src) + break; + } + if (processed && !tsgl_src) { + err = -EFAULT; + goto free; + } /* * Copy of AAD from source to destination @@ -194,7 +209,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, */ /* Use the RX SGL as source (and destination) for crypto op. */ - src = areq->first_rsgl.sgl.sg; + rsgl_src = areq->first_rsgl.sgl.sg; if (ctx->enc) { /* @@ -207,7 +222,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, * v v * RX SGL: AAD || PT || Tag */ - err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + err = crypto_aead_copy_sgl(null_tfm, tsgl_src, areq->first_rsgl.sgl.sg, processed); if (err) goto free; @@ -225,7 +240,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, */ /* Copy AAD || CT to RX SGL buffer for in-place operation. */ - err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + err = crypto_aead_copy_sgl(null_tfm, tsgl_src, areq->first_rsgl.sgl.sg, outlen); if (err) goto free; @@ -257,11 +272,11 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, areq->tsgl); } else /* no RX SGL present (e.g. authentication only) */ - src = areq->tsgl; + rsgl_src = areq->tsgl; } /* Initialize the crypto operation */ - aead_request_set_crypt(&areq->cra_u.aead_req, src, + aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src, areq->first_rsgl.sgl.sg, used, ctx->iv); aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); aead_request_set_tfm(&areq->cra_u.aead_req, tfm); -- cgit v1.2.3 From 7d2c3f54e6f646887d019faa45f35d6fe9fe82ce Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 10 Nov 2017 13:20:55 +0100 Subject: crypto: af_alg - remove locking in async callback The code paths protected by the socket-lock do not use or modify the socket in a non-atomic fashion. The actions pertaining the socket do not even need to be handled as an atomic operation. Thus, the socket-lock can be safely ignored. This fixes a bug regarding scheduling in atomic as the callback function may be invoked in interrupt context. In addition, the sock_hold is moved before the AIO encrypt/decrypt operation to ensure that the socket is always present. This avoids a tiny race window where the socket is unprotected and yet used by the AIO operation. Finally, the release of resources for a crypto operation is moved into a common function of af_alg_free_resources. Cc: Fixes: e870456d8e7c8 ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae43 ("crypto: algif_aead - overhaul memory management") Reported-by: Romain Izard Signed-off-by: Stephan Mueller Tested-by: Romain Izard Signed-off-by: Herbert Xu --- crypto/af_alg.c | 21 ++++++++++++++------- crypto/algif_aead.c | 23 ++++++++++++----------- crypto/algif_skcipher.c | 23 ++++++++++++----------- include/crypto/if_alg.h | 1 + 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 85cea9de324a..358749c38894 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1020,6 +1020,18 @@ unlock: } EXPORT_SYMBOL_GPL(af_alg_sendpage); +/** + * af_alg_free_resources - release resources required for crypto request + */ +void af_alg_free_resources(struct af_alg_async_req *areq) +{ + struct sock *sk = areq->sk; + + af_alg_free_areq_sgls(areq); + sock_kfree_s(sk, areq, areq->areqlen); +} +EXPORT_SYMBOL_GPL(af_alg_free_resources); + /** * af_alg_async_cb - AIO callback handler * @@ -1036,18 +1048,13 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) struct kiocb *iocb = areq->iocb; unsigned int resultlen; - lock_sock(sk); - /* Buffer size written by crypto operation. */ resultlen = areq->outlen; - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); - __sock_put(sk); + af_alg_free_resources(areq); + sock_put(sk); iocb->ki_complete(iocb, err ? err : resultlen, 0); - - release_sock(sk); } EXPORT_SYMBOL_GPL(af_alg_async_cb); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index e2068b78993c..805f485ddf1b 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -283,12 +283,23 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { /* AIO operation */ + sock_hold(sk); areq->iocb = msg->msg_iocb; aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) : crypto_aead_decrypt(&areq->cra_u.aead_req); + + /* AIO operation in progress */ + if (err == -EINPROGRESS || err == -EBUSY) { + /* Remember output size that will be generated. */ + areq->outlen = outlen; + + return -EIOCBQUEUED; + } + + sock_put(sk); } else { /* Synchronous operation */ aead_request_set_callback(&areq->cra_u.aead_req, @@ -300,19 +311,9 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, &ctx->wait); } - /* AIO operation in progress */ - if (err == -EINPROGRESS) { - sock_hold(sk); - - /* Remember output size that will be generated. */ - areq->outlen = outlen; - - return -EIOCBQUEUED; - } free: - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); + af_alg_free_resources(areq); return err ? err : outlen; } diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 9954b078f0b9..30cff827dd8f 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -117,6 +117,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { /* AIO operation */ + sock_hold(sk); areq->iocb = msg->msg_iocb; skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, @@ -124,6 +125,16 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, err = ctx->enc ? crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) : crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); + + /* AIO operation in progress */ + if (err == -EINPROGRESS || err == -EBUSY) { + /* Remember output size that will be generated. */ + areq->outlen = len; + + return -EIOCBQUEUED; + } + + sock_put(sk); } else { /* Synchronous operation */ skcipher_request_set_callback(&areq->cra_u.skcipher_req, @@ -136,19 +147,9 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, &ctx->wait); } - /* AIO operation in progress */ - if (err == -EINPROGRESS) { - sock_hold(sk); - - /* Remember output size that will be generated. */ - areq->outlen = len; - - return -EIOCBQUEUED; - } free: - af_alg_free_areq_sgls(areq); - sock_kfree_s(sk, areq, areq->areqlen); + af_alg_free_resources(areq); return err ? err : len; } diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 6abf0a3604dc..38d9c5861ed8 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -242,6 +242,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, unsigned int ivsize); ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(struct crypto_async_request *_req, int err); unsigned int af_alg_poll(struct file *file, struct socket *sock, poll_table *wait); -- cgit v1.2.3 From 2621e945fbf1d6df5f3f0ba7be5bae3d2cf9b6a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Nov 2017 14:51:02 +1100 Subject: powerpc/kexec: Fix kexec/kdump in P9 guest kernels The code that cleans up the IAMR/AMOR before kexec'ing failed to remember that when we're running as a guest AMOR is not writable, it's hypervisor privileged. They symptom is that the kexec stops before entering purgatory and nothing else is seen on the console. If you examine the state of the system all threads will be in the 0x700 program check handler. Fix it by making the write to AMOR dependent on HV mode. Fixes: 1e2a516e89fc ("powerpc/kexec: Fix radix to hash kexec due to IAMR/AMOR") Cc: stable@vger.kernel.org # v4.10+ Reported-by: Yilin Zhang Debugged-by: David Gibson Signed-off-by: Michael Ellerman Acked-by: Balbir Singh Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 8ac0bd2bddb0..3280953a82cf 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -623,7 +623,9 @@ BEGIN_FTR_SECTION * NOTE, we rely on r0 being 0 from above. */ mtspr SPRN_IAMR,r0 +BEGIN_FTR_SECTION_NESTED(42) mtspr SPRN_AMOR,r0 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* save regs for local vars on new stack. -- cgit v1.2.3 From 26f4e759ef9b8a2bab1823d692ed6d56d40b66e3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 23 Nov 2017 10:50:23 +0100 Subject: s390/disassembler: correct disassembly lines alignment 176.718956 Krnl Code: 00000000004d38b0: a54c0018 llihh %r4,24 176.718956 00000000004d38b4: b9080014 agr %r1,%r4 ^ Using a tab to align disassembly lines which follow the first line with "Krnl Code: " doesn't always work, e.g. if there is a prefix (timestamp or syslog prefix) which is not 8 chars aligned. Go back to alignment with spaces. Fixes: b192571d1ae3 ("s390/disassembler: increase show_code buffer size") Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 465d52b5e470..0a0e14335a04 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -548,7 +548,7 @@ void show_code(struct pt_regs *regs) start += opsize; pr_cont("%s", buffer); ptr = buffer; - ptr += sprintf(ptr, "\n\t "); + ptr += sprintf(ptr, "\n "); hops++; } pr_cont("\n"); -- cgit v1.2.3 From 53c4ab70c11c3ba1b9e3caa8e8c17e9c16d9cbc0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Nov 2017 17:19:32 +0100 Subject: s390: fix alloc_pgste check in init_new_context again git commit badb8bb983e9 "fix alloc_pgste check in init_new_context" fixed the problem of 'current->mm == NULL' in init_new_context back in 2011. git commit 3eabaee998c7 "KVM: s390: allow sie enablement for multi- threaded programs" completely removed the check against alloc_pgste. git commit 23fefe119ceb "s390/kvm: avoid global config of vm.alloc_pgste=1" re-added a check against the alloc_pgste flag but without the required check for current->mm != NULL. For execve() called by a kernel thread init_new_context() reads from ((struct mm_struct *) NULL)->context.alloc_pgste to decide between 2K vs 4K page tables. If the bit happens to be set for the init process it will be created with large page tables. This decision is inherited by all the children of init, this waste quite some memory. Re-add the check for 'current->mm != NULL'. Fixes: 23fefe119ceb ("s390/kvm: avoid global config of vm.alloc_pgste=1") Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f4a07f788f78..65154eaa3714 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || - current->mm->context.alloc_pgste; + (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; -- cgit v1.2.3 From 03a6c82218b9a87014b2c6c4e178294fdc8ebd8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: rxrpc: The mutex lock returned by rxrpc_accept_call() needs releasing The caller of rxrpc_accept_call() must release the lock on call->user_mutex returned by that function. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7d2595582c09..3a99b1a908df 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -619,8 +619,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call, rxrpc_call_put); - return 0; + ret = 0; + goto out_put_unlock; } call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); @@ -689,6 +689,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len, NULL); } +out_put_unlock: mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); -- cgit v1.2.3 From 48ca24636d5a26f1b7b8e69c54bccf19394843e5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: rxrpc: Don't set upgrade by default in sendmsg() Don't set upgrade by default when creating a call from sendmsg(). This is a holdover from when I was testing the code. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3a99b1a908df..94555c94b2d8 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -602,7 +602,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, - .upgrade = true, + .upgrade = false, }; _enter(""); -- cgit v1.2.3 From 9faaff593404a9c4e5abc6839a641635d7b9d0cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: rxrpc: Provide a different lockdep key for call->user_mutex for kernel calls Provide a different lockdep key for rxrpc_call::user_mutex when the call is made on a kernel socket, such as by the AFS filesystem. The problem is that lockdep registers a false positive between userspace calling the sendmsg syscall on a user socket where call->user_mutex is held whilst userspace memory is accessed whereas the AFS filesystem may perform operations with mmap_sem held by the caller. In such a case, the following warning is produced. ====================================================== WARNING: possible circular locking dependency detected 4.14.0-fscache+ #243 Tainted: G E ------------------------------------------------------ modpost/16701 is trying to acquire lock: (&vnode->io_lock){+.+.}, at: [] afs_begin_vnode_operation+0x33/0x77 [kafs] but task is already holding lock: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: __might_fault+0x61/0x89 _copy_from_iter_full+0x40/0x1fa rxrpc_send_data+0x8dc/0xff3 rxrpc_do_sendmsg+0x62f/0x6a1 rxrpc_sendmsg+0x166/0x1b7 sock_sendmsg+0x2d/0x39 ___sys_sendmsg+0x1ad/0x22b __sys_sendmsg+0x41/0x62 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #2 (&call->user_mutex){+.+.}: __mutex_lock+0x86/0x7d2 rxrpc_new_client_call+0x378/0x80e rxrpc_kernel_begin_call+0xf3/0x154 afs_make_call+0x195/0x454 [kafs] afs_vl_get_capabilities+0x193/0x198 [kafs] afs_vl_lookup_vldb+0x5f/0x151 [kafs] afs_create_volume+0x2e/0x2f4 [kafs] afs_mount+0x56a/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #1 (k-sk_lock-AF_RXRPC){+.+.}: lock_sock_nested+0x74/0x8a rxrpc_kernel_begin_call+0x8a/0x154 afs_make_call+0x195/0x454 [kafs] afs_fs_get_capabilities+0x17a/0x17f [kafs] afs_probe_fileserver+0xf7/0x2f0 [kafs] afs_select_fileserver+0x83f/0x903 [kafs] afs_fetch_status+0x89/0x11d [kafs] afs_iget+0x16f/0x4f8 [kafs] afs_mount+0x6c6/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #0 (&vnode->io_lock){+.+.}: lock_acquire+0x174/0x19f __mutex_lock+0x86/0x7d2 afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 __clear_user+0x3d/0x60 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 other info that might help us debug this: Chain exists of: &vnode->io_lock --> &call->user_mutex --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&call->user_mutex); lock(&mm->mmap_sem); lock(&vnode->io_lock); *** DEADLOCK *** 1 lock held by modpost/16701: #0: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 stack backtrace: CPU: 0 PID: 16701 Comm: modpost Tainted: G E 4.14.0-fscache+ #243 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack+0x67/0x8e print_circular_bug+0x341/0x34f check_prev_add+0x11f/0x5d4 ? add_lock_to_list.isra.12+0x8b/0x8b ? add_lock_to_list.isra.12+0x8b/0x8b ? __lock_acquire+0xf77/0x10b4 __lock_acquire+0xf77/0x10b4 lock_acquire+0x174/0x19f ? afs_begin_vnode_operation+0x33/0x77 [kafs] __mutex_lock+0x86/0x7d2 ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba ? filemap_fault+0x179/0x54d filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 RIP: 0010:__clear_user+0x3d/0x60 RSP: 0018:ffff880071e93da0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000000000011c RCX: 000000000000011c RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000060f720 RBP: 000000000060f720 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffff8800b5459b68 R12: ffff8800ce150e00 R13: 000000000060f720 R14: 00000000006127a8 R15: 0000000000000000 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fdb6009ee07 RSP: 002b:00007fff566d9728 EFLAGS: 00000246 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 000055ba57280900 RCX: 00007fdb6009ee07 RDX: 000055ba5727f270 RSI: 000055ba5727cac0 RDI: 000055ba57280900 RBP: 000055ba57280900 R08: 00007fff566d9700 R09: 0000000000000000 R10: 000055ba5727cac0 R11: 0000000000000246 R12: 0000000000000000 R13: 000055ba5727cac0 R14: 000055ba5727f270 R15: 0000000000000000 Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_object.c | 19 +++++++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b2151993d384..a972887b3f5d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -672,7 +672,7 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cbd1701e813a..3028298ca561 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4c7fbc6dcce7..1f141dc08ad2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -55,6 +55,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } +static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; + /* * find an extant server call * - called in process context with IRQs enabled @@ -95,7 +97,7 @@ found_extant_call: /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_call *call; @@ -114,6 +116,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) goto nomem_2; mutex_init(&call->user_mutex); + + /* Prevent lockdep reporting a deadlock false positive between the afs + * filesystem and sys_sendmsg() via the mmap sem. + */ + if (rx->sk.sk_kern_sock) + lockdep_set_class(&call->user_mutex, + &rxrpc_call_user_mutex_lock_class_key); + setup_timer(&call->timer, rxrpc_call_timer_expired, (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); @@ -151,7 +161,8 @@ nomem: /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; @@ -159,7 +170,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, _enter(""); - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -210,7 +221,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); -- cgit v1.2.3 From 3136ef49a14ccc148becf813074e08fc92fc9b23 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Delay terminal ACK transmission on a client call Delay terminal ACK transmission on a client call by deferring it to the connection processor. This allows it to be skipped if we can send the next call instead, the first DATA packet of which will implicitly ack this call. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 17 ++++++++++++ net/rxrpc/conn_client.c | 18 ++++++++++++ net/rxrpc/conn_event.c | 74 ++++++++++++++++++++++++++++++++++++++++--------- net/rxrpc/conn_object.c | 10 +++++++ net/rxrpc/recvmsg.c | 2 ++ 5 files changed, 108 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a972887b3f5d..d1213d503f30 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -338,8 +338,17 @@ enum rxrpc_conn_flag { RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ + RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ + RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ + RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ + RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ }; +#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ + (1UL << RXRPC_CONN_FINAL_ACK_1) | \ + (1UL << RXRPC_CONN_FINAL_ACK_2) | \ + (1UL << RXRPC_CONN_FINAL_ACK_3)) + /* * Events that can be raised upon a connection. */ @@ -393,6 +402,7 @@ struct rxrpc_connection { #define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { + unsigned long final_ack_at; /* Time at which to issue final ACK */ struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -404,6 +414,7 @@ struct rxrpc_connection { }; } channels[RXRPC_MAXCALLS]; + struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ @@ -861,6 +872,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) rxrpc_put_service_conn(conn); } +static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, + unsigned long expire_at) +{ + timer_reduce(&conn->timer, expire_at); +} + /* * conn_service.c */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f9624bd311c..cfb997593da9 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + /* Cancel the final ACK on the previous call if it hasn't been sent yet + * as the DATA packet will implicitly ACK it. + */ + clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + write_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; @@ -813,6 +818,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out_2; } + /* Schedule the final ACK to be transmitted in a short while so that it + * can be skipped if we find a follow-on call. The first DATA packet + * of the follow on call will implicitly ACK this call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + unsigned long final_ack_at = jiffies + 2; + + WRITE_ONCE(chan->final_ack_at, final_ack_at); + smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ + set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + rxrpc_reduce_conn_timer(conn, final_ack_at); + } + /* Things are more complex and we need the cache lock. We might be * able to simply idle the conn or it might now be lurking on the wait * list. It might even get moved back to the active list whilst we're diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 59a51a56e7c8..9e9a8db1bc9c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -24,9 +24,10 @@ * Retransmit terminal ACK or ABORT of the previous call. */ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int channel) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; struct kvec iov; @@ -48,7 +49,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); - chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. @@ -56,7 +57,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, call_id = READ_ONCE(chan->last_call); /* Sync with __rxrpc_disconnect_call() */ smp_rmb(); - if (call_id != sp->hdr.callNumber) + if (skb && call_id != sp->hdr.callNumber) return; msg.msg_name = &conn->params.peer->srx.transport; @@ -65,9 +66,9 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, msg.msg_controllen = 0; msg.msg_flags = 0; - pkt.whdr.epoch = htonl(sp->hdr.epoch); - pkt.whdr.cid = htonl(sp->hdr.cid); - pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(conn->proto.cid); + pkt.whdr.callNumber = htonl(call_id); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -87,11 +88,11 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, mtu = conn->params.peer->if_mtu; mtu -= conn->params.peer->hdrsize; pkt.ack.bufferSpace = 0; - pkt.ack.maxSkew = htons(skb->priority); - pkt.ack.firstPacket = htonl(chan->last_seq); - pkt.ack.previousPacket = htonl(chan->last_seq - 1); - pkt.ack.serial = htonl(sp->hdr.serial); - pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.maxSkew = htons(skb ? skb->priority : 0); + pkt.ack.firstPacket = htonl(chan->last_seq + 1); + pkt.ack.previousPacket = htonl(chan->last_seq); + pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); + pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; pkt.info.rxMTU = htonl(rxrpc_rx_mtu); pkt.info.maxMTU = htonl(mtu); @@ -272,7 +273,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb); + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); return 0; case RXRPC_PACKET_TYPE_BUSY: @@ -378,6 +380,48 @@ abort: _leave(" [aborted]"); } +/* + * Process delayed final ACKs that we haven't subsumed into a subsequent call. + */ +static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) +{ + unsigned long j = jiffies, next_j; + unsigned int channel; + bool set; + +again: + next_j = j + LONG_MAX; + set = false; + for (channel = 0; channel < RXRPC_MAXCALLS; channel++) { + struct rxrpc_channel *chan = &conn->channels[channel]; + unsigned long ack_at; + + if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) + continue; + + smp_rmb(); /* vs rxrpc_disconnect_client_call */ + ack_at = READ_ONCE(chan->final_ack_at); + + if (time_before(j, ack_at)) { + if (time_before(ack_at, next_j)) { + next_j = ack_at; + set = true; + } + continue; + } + + if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, + &conn->flags)) + rxrpc_conn_retransmit_call(conn, NULL, channel); + } + + j = jiffies; + if (time_before_eq(next_j, j)) + goto again; + if (set) + rxrpc_reduce_conn_timer(conn, next_j); +} + /* * connection-level event processor */ @@ -394,6 +438,10 @@ void rxrpc_process_connection(struct work_struct *work) if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn); + /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fe575798592f..a01a3791f06a 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -24,6 +24,14 @@ unsigned int rxrpc_connection_expiry = 10 * 60; static void rxrpc_destroy_connection(struct rcu_head *); +static void rxrpc_connection_timer(struct timer_list *timer) +{ + struct rxrpc_connection *conn = + container_of(timer, struct rxrpc_connection, timer); + + rxrpc_queue_conn(conn); +} + /* * allocate a new connection */ @@ -38,6 +46,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); INIT_LIST_HEAD(&conn->waiting_calls); + timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, &rxrpc_process_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); @@ -332,6 +341,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) _net("DESTROY CONN %d", conn->debug_id); + del_timer_sync(&conn->timer); rxrpc_purge_queue(&conn->rx_queue); conn->security->clear(conn); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8510a98b87e1..be0b9ae13893 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); +#if 0 // TODO: May want to transmit final ACK under some circumstances anyway if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); rxrpc_send_ack_packet(call, false); } +#endif write_lock_bh(&call->state_lock); -- cgit v1.2.3 From 4812417894770f8c13e5dd8a66479ae44f4b01ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Split the call params from the operation params When rxrpc_sendmsg() parses the control message buffer, it places the parameters extracted into a structure, but lumps together call parameters (such as user call ID) with operation parameters (such as whether to send data, send an abort or accept a call). Split the call parameters out into their own structure, a copy of which is then embedded in the operation parameters struct. The call parameters struct is then passed down into the places that need it instead of passing the individual parameters. This allows for extra call parameters to be added. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 8 ++++++-- net/rxrpc/ar-internal.h | 31 +++++++++++++++++++++++++++++- net/rxrpc/call_object.c | 15 +++++++-------- net/rxrpc/sendmsg.c | 51 +++++++++++++++++-------------------------------- 4 files changed, 60 insertions(+), 45 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9b5c46b052fd..c0cdcf980ffc 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, bool upgrade) { struct rxrpc_conn_parameters cp; + struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); int ret; @@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, if (key && !key->payload.data[0]) key = NULL; /* a no-security key */ + memset(&p, 0, sizeof(p)); + p.user_call_ID = user_call_ID; + p.tx_total_len = tx_total_len; + memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.key = key; @@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, - gfp); + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d1213d503f30..ba63f2231107 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -643,6 +643,35 @@ struct rxrpc_ack_summary { u8 cumulative_acks; }; +/* + * sendmsg() cmsg-specified parameters. + */ +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + +struct rxrpc_call_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + struct { + u32 hard; /* Maximum lifetime (sec) */ + u32 idle; /* Max time since last data packet (msec) */ + u32 normal; /* Max time since last call packet (msec) */ + } timeouts; + u8 nr_timeouts; /* Number of timeouts specified */ +}; + +struct rxrpc_send_params { + struct rxrpc_call_params call; + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + #include /* @@ -687,7 +716,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - unsigned long, s64, gfp_t); + struct rxrpc_call_params *, gfp_t); int rxrpc_retry_client_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 1f141dc08ad2..c3e1fa854471 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -208,8 +208,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, - unsigned long user_call_ID, - s64 tx_total_len, + struct rxrpc_call_params *p, gfp_t gfp) __releases(&rx->sk.sk_lock.slock) { @@ -219,7 +218,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, const void *here = __builtin_return_address(0); int ret; - _enter("%p,%lx", rx, user_call_ID); + _enter("%p,%lx", rx, p->user_call_ID); call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { @@ -228,9 +227,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - call->tx_total_len = tx_total_len; + call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), - here, (const void *)user_call_ID); + here, (const void *)p->user_call_ID); /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. @@ -246,16 +245,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) + if (p->user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) + else if (p->user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); - call->user_call_ID = user_call_ID; + call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 94555c94b2d8..de5ab327c18a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -21,22 +21,6 @@ #include #include "ar-internal.h" -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - -struct rxrpc_send_params { - s64 tx_total_len; /* Total Tx data length (if send data) */ - unsigned long user_call_ID; /* User's call ID */ - u32 abort_code; /* Abort code to Tx (if abort) */ - enum rxrpc_command command : 8; /* The command to implement */ - bool exclusive; /* Shared or exclusive call */ - bool upgrade; /* If the connection is upgradeable */ -}; - /* * Wait for space to appear in the Tx queue or a signal to occur. */ @@ -480,11 +464,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; - p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); + p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; - p->user_call_ID = *(unsigned long *) + p->call.user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } got_user_ID = true; @@ -522,10 +506,10 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) break; case RXRPC_TX_LENGTH: - if (p->tx_total_len != -1 || len != sizeof(__s64)) + if (p->call.tx_total_len != -1 || len != sizeof(__s64)) return -EINVAL; - p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); - if (p->tx_total_len < 0) + p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->call.tx_total_len < 0) return -EINVAL; break; @@ -536,7 +520,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (!got_user_ID) return -EINVAL; - if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; _leave(" = 0"); return 0; @@ -576,8 +560,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, - p->tx_total_len, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); /* The socket is now unlocked */ _leave(" = %p\n", call); @@ -597,12 +580,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) int ret; struct rxrpc_send_params p = { - .tx_total_len = -1, - .user_call_ID = 0, - .abort_code = 0, - .command = RXRPC_CMD_SEND_DATA, - .exclusive = false, - .upgrade = false, + .call.tx_total_len = -1, + .call.user_call_ID = 0, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = false, }; _enter(""); @@ -615,7 +598,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.user_call_ID, NULL); + call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); @@ -623,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto out_put_unlock; } - call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); + call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); if (!call) { ret = -EBADSLT; if (p.command != RXRPC_CMD_SEND_DATA) @@ -653,13 +636,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto error_put; } - if (p.tx_total_len != -1) { + if (p.call.tx_total_len != -1) { ret = -EINVAL; if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) goto error_put; - call->tx_total_len = p.tx_total_len; + call->tx_total_len = p.call.tx_total_len; } } -- cgit v1.2.3 From a158bdd3247b9656df36ba133235fff702e9fdc3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Fix call timeouts Fix the rxrpc call expiration timeouts and make them settable from userspace. By analogy with other rx implementations, there should be three timeouts: (1) "Normal timeout" This is set for all calls and is triggered if we haven't received any packets from the peer in a while. It is measured from the last time we received any packet on that call. This is not reset by any connection packets (such as CHALLENGE/RESPONSE packets). If a service operation takes a long time, the server should generate PING ACKs at a duration that's substantially less than the normal timeout so is to keep both sides alive. This is set at 1/6 of normal timeout. (2) "Idle timeout" This is set only for a service call and is triggered if we stop receiving the DATA packets that comprise the request data. It is measured from the last time we received a DATA packet. (3) "Hard timeout" This can be set for a call and specified the maximum lifetime of that call. It should not be specified by default. Some operations (such as volume transfer) take a long time. Allow userspace to set/change the timeouts on a call with sendmsg, using a control message: RXRPC_SET_CALL_TIMEOUTS The data to the message is a number of 32-bit words, not all of which need be given: u32 hard_timeout; /* sec from first packet */ u32 idle_timeout; /* msec from packet Rx */ u32 normal_timeout; /* msec from data Rx */ This can be set in combination with any other sendmsg() that affects a call. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 69 +++++++++++------ include/uapi/linux/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 37 +++++---- net/rxrpc/call_event.c | 179 ++++++++++++++++++++----------------------- net/rxrpc/call_object.c | 27 ++++--- net/rxrpc/conn_client.c | 4 +- net/rxrpc/input.c | 34 +++++++- net/rxrpc/misc.c | 19 ++--- net/rxrpc/recvmsg.c | 2 +- net/rxrpc/sendmsg.c | 59 +++++++++++--- net/rxrpc/sysctl.c | 60 +++++++-------- 11 files changed, 290 insertions(+), 201 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ebe96796027a..01dcbc2164b5 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -138,10 +138,20 @@ enum rxrpc_rtt_rx_trace { enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_exp_ack, + rxrpc_timer_exp_hard, + rxrpc_timer_exp_idle, + rxrpc_timer_exp_normal, + rxrpc_timer_exp_ping, + rxrpc_timer_exp_resend, rxrpc_timer_expired, rxrpc_timer_init_for_reply, rxrpc_timer_init_for_send_reply, + rxrpc_timer_restart, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_hard, + rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, @@ -296,12 +306,22 @@ enum rxrpc_congest_change { #define rxrpc_timer_traces \ EM(rxrpc_timer_begin, "Begin ") \ EM(rxrpc_timer_expired, "*EXPR*") \ + EM(rxrpc_timer_exp_ack, "ExpAck") \ + EM(rxrpc_timer_exp_hard, "ExpHrd") \ + EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_normal, "ExpNml") \ + EM(rxrpc_timer_exp_ping, "ExpPng") \ + EM(rxrpc_timer_exp_resend, "ExpRsn") \ EM(rxrpc_timer_init_for_reply, "IniRpl") \ EM(rxrpc_timer_init_for_send_reply, "SndRpl") \ + EM(rxrpc_timer_restart, "Restrt") \ EM(rxrpc_timer_set_for_ack, "SetAck") \ + EM(rxrpc_timer_set_for_hard, "SetHrd") \ + EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ - E_(rxrpc_timer_set_for_send, "SetTx ") + E_(rxrpc_timer_set_for_send, "SetSnd") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -932,39 +952,44 @@ TRACE_EVENT(rxrpc_rtt_rx, TRACE_EVENT(rxrpc_timer, TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now, unsigned long now_j), + unsigned long now), - TP_ARGS(call, why, now, now_j), + TP_ARGS(call, why, now), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(enum rxrpc_timer_trace, why ) - __field_struct(ktime_t, now ) - __field_struct(ktime_t, expire_at ) - __field_struct(ktime_t, ack_at ) - __field_struct(ktime_t, resend_at ) - __field(unsigned long, now_j ) - __field(unsigned long, timer ) + __field(long, now ) + __field(long, ack_at ) + __field(long, resend_at ) + __field(long, ping_at ) + __field(long, expect_rx_by ) + __field(long, expect_req_by ) + __field(long, expect_term_by ) + __field(long, timer ) ), TP_fast_assign( - __entry->call = call; - __entry->why = why; - __entry->now = now; - __entry->expire_at = call->expire_at; - __entry->ack_at = call->ack_at; - __entry->resend_at = call->resend_at; - __entry->now_j = now_j; - __entry->timer = call->timer.expires; + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->expect_rx_by = call->expect_rx_by; + __entry->expect_req_by = call->expect_req_by; + __entry->expect_term_by = call->expect_term_by; + __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", + TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), - ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), - __entry->timer - __entry->now_j) + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->expect_rx_by - __entry->now, + __entry->expect_req_by - __entry->now, + __entry->expect_term_by - __entry->now, + __entry->timer - __entry->now) ); TRACE_EVENT(rxrpc_rx_lose, diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 9d4afea308a4..9335d92c14a4 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -59,6 +59,7 @@ enum rxrpc_cmsg_type { RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ + RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ RXRPC__SUPPORTED }; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba63f2231107..548411371048 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -468,9 +468,9 @@ enum rxrpc_call_flag { enum rxrpc_call_event { RXRPC_CALL_EV_ACK, /* need to generate ACK */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ + RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ }; /* @@ -514,10 +514,14 @@ struct rxrpc_call { struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ - ktime_t ack_at; /* When deferred ACK needs to happen */ - ktime_t resend_at; /* When next resend needs to happen */ - ktime_t ping_at; /* When next to send a ping */ - ktime_t expire_at; /* When the call times out */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long ping_at; /* When next to send a ping */ + unsigned long expect_rx_by; /* When we expect to get a packet by */ + unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ + unsigned long expect_term_by; /* When we expect call termination by */ + u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ + u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -697,12 +701,19 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); +static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + trace_rxrpc_timer(call, why, now); + timer_reduce(&call->timer, expire_at); +} + /* * call_object.c */ @@ -843,8 +854,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, */ extern unsigned int rxrpc_max_client_connections; extern unsigned int rxrpc_reap_client_connections; -extern unsigned int rxrpc_conn_idle_client_expiry; -extern unsigned int rxrpc_conn_idle_client_fast_expiry; +extern unsigned long rxrpc_conn_idle_client_expiry; +extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); @@ -976,13 +987,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local) * misc.c */ extern unsigned int rxrpc_max_backlog __read_mostly; -extern unsigned int rxrpc_requested_ack_delay; -extern unsigned int rxrpc_soft_ack_delay; -extern unsigned int rxrpc_idle_ack_delay; +extern unsigned long rxrpc_requested_ack_delay; +extern unsigned long rxrpc_soft_ack_delay; +extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned int rxrpc_resend_timeout; +extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3574508baf9a..c14395d5ad8c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -21,80 +21,6 @@ #include #include "ar-internal.h" -/* - * Set the timer - */ -void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - unsigned long t_j, now_j = jiffies; - ktime_t t; - bool queue = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - t = call->expire_at; - if (!ktime_after(t, now)) { - trace_rxrpc_timer(call, why, now, now_j); - queue = true; - goto out; - } - - if (!ktime_after(call->resend_at, now)) { - call->resend_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - queue = true; - } else if (ktime_before(call->resend_at, t)) { - t = call->resend_at; - } - - if (!ktime_after(call->ack_at, now)) { - call->ack_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - queue = true; - } else if (ktime_before(call->ack_at, t)) { - t = call->ack_at; - } - - if (!ktime_after(call->ping_at, now)) { - call->ping_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) - queue = true; - } else if (ktime_before(call->ping_at, t)) { - t = call->ping_at; - } - - t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); - t_j += jiffies; - - /* We have to make sure that the calculated jiffies value falls - * at or after the nsec value, or we may loop ceaselessly - * because the timer times out, but we haven't reached the nsec - * timeout yet. - */ - t_j++; - - if (call->timer.expires != t_j || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t_j); - trace_rxrpc_timer(call, why, now, now_j); - } - } - -out: - if (queue) - rxrpc_queue_call(call); -} - -/* - * Set the timer - */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - read_lock_bh(&call->state_lock); - __rxrpc_set_timer(call, why, now); - read_unlock_bh(&call->state_lock); -} - /* * Propose a PING ACK be sent. */ @@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) rxrpc_queue_call(call); } else { - ktime_t now = ktime_get_real(); - ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + unsigned long now = jiffies; + unsigned long ping_at = now + rxrpc_idle_ack_delay; - if (ktime_before(ping_at, call->ping_at)) { - call->ping_at = ping_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + if (time_before(ping_at, call->ping_at)) { + WRITE_ONCE(call->ping_at, ping_at); + rxrpc_reduce_call_timer(call, ping_at, now, + rxrpc_timer_set_for_ping); } } } @@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned int expiry = rxrpc_soft_ack_delay; - ktime_t now, ack_at; + unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; /* Pings are handled specially because we don't want to accidentally @@ -190,11 +116,12 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - now = ktime_get_real(); - ack_at = ktime_add_ms(now, expiry); - if (ktime_before(ack_at, call->ack_at)) { - call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); + now = jiffies; + ack_at = jiffies + expiry; + if (time_before(ack_at, call->ack_at)) { + WRITE_ONCE(call->ack_at, ack_at); + rxrpc_reduce_call_timer(call, ack_at, now, + rxrpc_timer_set_for_ack); } } @@ -227,18 +154,20 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) +static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + now = ktime_get_real(); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout * 1000 / HZ); spin_lock_bh(&call->lock); @@ -282,7 +211,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now))); + resend_at += jiffies + rxrpc_resend_timeout; + WRITE_ONCE(call->resend_at, resend_at); if (unacked) rxrpc_congestion_timeout(call); @@ -292,7 +223,8 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) @@ -364,7 +296,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - ktime_t now; + unsigned long now, next, t; rxrpc_see_call(call); @@ -384,8 +316,50 @@ recheck_state: goto out_put; } - now = ktime_get_real(); - if (ktime_before(call->expire_at, now)) { + /* Work out if any timeouts tripped */ + now = jiffies; + t = READ_ONCE(call->expect_rx_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_req_by); + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_term_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->ack_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); + cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK, &call->events); + } + + t = READ_ONCE(call->ping_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); + cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->resend_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); + cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_RESEND, &call->events); + } + + /* Process events */ + if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; @@ -408,7 +382,22 @@ recheck_state: goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + /* Make sure the timer is restarted */ + next = call->expect_rx_by; + +#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } + + set(call->expect_req_by); + set(call->expect_term_by); + set(call->ack_at); + set(call->resend_at); + set(call->ping_at); + + now = jiffies; + if (time_after_eq(now, next)) + goto recheck_state; + + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c3e1fa854471..b305970a9b63 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -51,8 +51,10 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + rxrpc_queue_call(call); + } } static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; @@ -139,6 +141,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -189,15 +193,16 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - ktime_t now = ktime_get_real(), expire_at; - - expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); - call->expire_at = expire_at; - call->ack_at = expire_at; - call->ping_at = expire_at; - call->resend_at = expire_at; - call->timer.expires = jiffies + LONG_MAX / 2; - rxrpc_set_timer(call, rxrpc_timer_begin, now); + unsigned long now = jiffies; + unsigned long j = now + MAX_JIFFY_OFFSET; + + call->ack_at = j; + call->resend_at = j; + call->ping_at = j; + call->expect_rx_by = j; + call->expect_req_by = j; + call->expect_term_by = j; + call->timer.expires = now; } /* diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index cfb997593da9..97f6a8de4845 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -85,8 +85,8 @@ __read_mostly unsigned int rxrpc_max_client_connections = 1000; __read_mostly unsigned int rxrpc_reap_client_connections = 900; -__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; -__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; /* * We use machine-unique IDs for our client connections. diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1b592073ec96..c89647eae86d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -318,16 +318,18 @@ bad_state: static bool rxrpc_receiving_reply(struct rxrpc_call *call) { struct rxrpc_ack_summary summary = { 0 }; + unsigned long now, timo; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { spin_lock_bh(&call->lock); call->ackr_reason = 0; - call->resend_at = call->expire_at; - call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply, - ktime_get_real()); + now = jiffies; + timo = now + MAX_JIFFY_OFFSET; + WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->ack_at, timo); + trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) @@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, if (state >= RXRPC_CALL_COMPLETE) return; + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + unsigned long timo = READ_ONCE(call->next_req_timo); + unsigned long now, expect_req_by; + + if (timo) { + now = jiffies; + expect_req_by = now + timo; + WRITE_ONCE(call->expect_req_by, expect_req_by); + rxrpc_reduce_call_timer(call, expect_req_by, now, + rxrpc_timer_set_for_idle); + } + } + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ @@ -908,9 +923,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long timo; _enter("%p,%p", call, skb); + timo = READ_ONCE(call->next_rx_timo); + if (timo) { + unsigned long now = jiffies, expect_rx_by; + + expect_rx_by = jiffies + timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, now, + rxrpc_timer_set_for_normal); + } + switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1a2d4b112064..c1d9e7fd7448 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -20,34 +20,29 @@ */ unsigned int rxrpc_max_backlog __read_mostly = 10; -/* - * Maximum lifetime of a call (in mx). - */ -unsigned int rxrpc_max_call_lifetime = 60 * 1000; - /* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in ms). + * packet with RXRPC_REQUEST_ACK set (in jiffies). */ -unsigned int rxrpc_requested_ack_delay = 1; +unsigned long rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in ms). + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * 1000; +unsigned long rxrpc_soft_ack_delay = HZ; /* - * How long to wait before scheduling an ACK with subtype IDLE (in ms). + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; +unsigned long rxrpc_idle_ack_delay = HZ / 2; /* * Receive window size in packets. This indicates the maximum number of @@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4; /* * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * 1000; +unsigned long rxrpc_resend_timeout = 4 * HZ; const s8 rxrpc_ack_priority[] = { [0] = 0, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index be0b9ae13893..0b6609da80b7 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -163,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->ack_at = call->expire_at; + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; write_unlock_bh(&call->state_lock); rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, rxrpc_propose_ack_processing_op); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index de5ab327c18a..03e0676db28c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -158,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long now; rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; u8 annotation = RXRPC_TX_ANNO_UNACK; @@ -197,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; - call->ack_at = call->expire_at; + now = jiffies; + WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; - __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, - ktime_get_real()); + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); if (!last) break; /* Fall through */ @@ -223,14 +224,12 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - ktime_t now = ktime_get_real(), resend_at; + unsigned long now = jiffies, resend_at; - resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - - if (ktime_before(resend_at, call->resend_at)) { - call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); - } + resend_at = now + rxrpc_resend_timeout; + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); @@ -513,6 +512,19 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) return -EINVAL; break; + case RXRPC_SET_CALL_TIMEOUT: + if (len & 3 || len < 4 || len > 12) + return -EINVAL; + memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); + p->call.nr_timeouts = len / 4; + if (p->call.timeouts.hard > INT_MAX / HZ) + return -ERANGE; + if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) + return -ERANGE; + if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) + return -ERANGE; + break; + default: return -EINVAL; } @@ -577,11 +589,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_call_state state; struct rxrpc_call *call; + unsigned long now, j; int ret; struct rxrpc_send_params p = { .call.tx_total_len = -1, .call.user_call_ID = 0, + .call.nr_timeouts = 0, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, @@ -646,6 +660,31 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } } + switch (p.call.nr_timeouts) { + case 3: + j = msecs_to_jiffies(p.call.timeouts.normal); + if (p.call.timeouts.normal > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_rx_timo, j); + /* Fall through */ + case 2: + j = msecs_to_jiffies(p.call.timeouts.idle); + if (p.call.timeouts.idle > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_req_timo, j); + /* Fall through */ + case 1: + if (p.call.timeouts.hard > 0) { + j = msecs_to_jiffies(p.call.timeouts.hard); + now = jiffies; + j += now; + WRITE_ONCE(call->expect_term_by, j); + rxrpc_reduce_call_timer(call, j, now, + rxrpc_timer_set_for_hard); + } + break; + } + state = READ_ONCE(call->state); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, state, call->conn); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 34c706d2f79c..4a7af7aff37d 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -21,6 +21,8 @@ static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; +static const unsigned long one_jiffy = 1; +static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; /* * RxRPC operating parameters. @@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; * information on the individual parameters. */ static struct ctl_table rxrpc_sysctl_table[] = { - /* Values measured in milliseconds */ + /* Values measured in milliseconds but used in jiffies */ { .procname = "req_ack_delay", .data = &rxrpc_requested_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&zero, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "soft_ack_delay", .data = &rxrpc_soft_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_ack_delay", .data = &rxrpc_idle_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, - }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_expiry", .data = &rxrpc_conn_idle_client_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_fast_expiry", .data = &rxrpc_conn_idle_client_fast_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, - - /* Values measured in seconds but used in jiffies */ { - .procname = "max_call_lifetime", - .data = &rxrpc_max_call_lifetime, - .maxlen = sizeof(unsigned int), + .procname = "resend_timeout", + .data = &rxrpc_resend_timeout, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, /* Non-time values */ -- cgit v1.2.3 From 8637abaa72fe8200a4a37579e6ec5273db85d2c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Don't transmit DELAY ACKs immediately on proposal Don't transmit a DELAY ACK immediately on proposal when the Rx window is rotated, but rather defer it to the work function. This means that we have a chance to queue/consume more received packets before we actually send the DELAY ACK, or even cancel it entirely, thereby reducing the number of packets transmitted. We do, however, want to continue sending other types of packet immediately, particularly REQUESTED ACKs, as they may be used for RTT calculation by the other side. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0b6609da80b7..fad5f42a3abd 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -219,9 +219,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) after_eq(top, call->ackr_seen + 2) || (hard_ack == top && after(hard_ack, call->ackr_consumed))) rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, - true, false, + true, true, rxrpc_propose_ack_rotate_rx); - if (call->ackr_reason) + if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) rxrpc_send_ack_packet(call, false); } } -- cgit v1.2.3 From beb8e5e4f38cc3e4c2839cfc143e0312bf53d0e0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Express protocol timeouts in terms of RTT Express protocol timeouts for data retransmission and deferred ack generation in terms on RTT rather than specified timeouts once we have sufficient RTT samples. For the moment, this requires just one RTT sample to be able to use this for ack deferral and two for data retransmission. The data retransmission timeout is set at RTT*1.5 and the ACK deferral timeout is set at RTT. Note that the calculated timeout is limited to a minimum of 4ns to make sure it doesn't happen too quickly. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 22 ++++++++++++++++++---- net/rxrpc/sendmsg.c | 7 +++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c14395d5ad8c..da91f16ac77c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -52,7 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; + unsigned long expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; /* Pings are handled specially because we don't want to accidentally @@ -116,7 +116,13 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - now = jiffies; + unsigned long now = jiffies, ack_at; + + if (call->peer->rtt_usage > 0) + ack_at = nsecs_to_jiffies(call->peer->rtt); + else + ack_at = expiry; + ack_at = jiffies + expiry; if (time_before(ack_at, call->ack_at)) { WRITE_ONCE(call->ack_at, ack_at); @@ -160,14 +166,22 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) struct sk_buff *skb; unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + if (call->peer->rtt_usage > 1) + timeout = ns_to_ktime(call->peer->rtt * 3 / 2); + else + timeout = ms_to_ktime(rxrpc_resend_timeout); + min_timeo = ns_to_ktime((1000000000 / HZ) * 4); + if (ktime_before(timeout, min_timeo)) + timeout = min_timeo; + now = ktime_get_real(); - max_age = ktime_sub_ms(now, rxrpc_resend_timeout * 1000 / HZ); + max_age = ktime_sub(now, timeout); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 03e0676db28c..c56ee54fdd1f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -226,6 +226,13 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, } else { unsigned long now = jiffies, resend_at; + if (call->peer->rtt_usage > 1) + resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); + else + resend_at = rxrpc_resend_timeout; + if (resend_at < 1) + resend_at = 1; + resend_at = now + rxrpc_resend_timeout; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, -- cgit v1.2.3 From bd1fdf8cfdf3fdbccd2b21c33ec649ebd7429af7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Add a timeout for detecting lost ACKs/lost DATA Add an extra timeout that is set/updated when we send a DATA packet that has the request-ack flag set. This allows us to detect if we don't get an ACK in response to the latest flagged packet. The ACK packet is adjudged to have been lost if it doesn't turn up within 2*RTT of the transmission. If the timeout occurs, we schedule the sending of a PING ACK to find out the state of the other side. If a new DATA packet is ready to go sooner, we cancel the sending of the ping and set the request-ack flag on that instead. If we get back a PING-RESPONSE ACK that indicates a lower tx_top than what we had at the time of the ping transmission, we adjudge all the DATA packets sent between the response tx_top and the ping-time tx_top to have been lost and retransmit immediately. Rather than sending a PING ACK, we could just pick a DATA packet and speculatively retransmit that with request-ack set. It should result in either a REQUESTED ACK or a DUPLICATE ACK which we can then use in lieu the a PING-RESPONSE ACK mentioned above. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 11 +++++++++-- net/rxrpc/ar-internal.h | 6 +++++- net/rxrpc/call_event.c | 26 ++++++++++++++++++++++---- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/rxrpc/output.c | 20 ++++++++++++++++++-- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 2 +- 8 files changed, 98 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 01dcbc2164b5..84ade8b76a19 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, rxrpc_timer_exp_resend, @@ -151,6 +152,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, @@ -309,6 +311,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ EM(rxrpc_timer_exp_resend, "ExpRsn") \ @@ -318,6 +321,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ @@ -961,6 +965,7 @@ TRACE_EVENT(rxrpc_timer, __field(enum rxrpc_timer_trace, why ) __field(long, now ) __field(long, ack_at ) + __field(long, ack_lost_at ) __field(long, resend_at ) __field(long, ping_at ) __field(long, expect_rx_by ) @@ -974,6 +979,7 @@ TRACE_EVENT(rxrpc_timer, __entry->why = why; __entry->now = now; __entry->ack_at = call->ack_at; + __entry->ack_lost_at = call->ack_lost_at; __entry->resend_at = call->resend_at; __entry->expect_rx_by = call->expect_rx_by; __entry->expect_req_by = call->expect_req_by; @@ -981,10 +987,11 @@ TRACE_EVENT(rxrpc_timer, __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%p %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), __entry->ack_at - __entry->now, + __entry->ack_lost_at - __entry->now, __entry->resend_at - __entry->now, __entry->expect_rx_by - __entry->now, __entry->expect_req_by - __entry->now, @@ -1105,7 +1112,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%p r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 548411371048..7e7b817c69f0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -471,6 +471,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ + RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ }; /* @@ -515,6 +516,7 @@ struct rxrpc_call { struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ @@ -624,6 +626,8 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ + rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ + rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ }; /* @@ -1011,7 +1015,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *, bool); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index da91f16ac77c..c65666b2f39e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -245,7 +245,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto out; } @@ -310,6 +310,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); + rxrpc_serial_t *send_ack; unsigned long now, next, t; rxrpc_see_call(call); @@ -358,6 +359,13 @@ recheck_state: set_bit(RXRPC_CALL_EV_ACK, &call->events); } + t = READ_ONCE(call->ack_lost_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); + cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -379,15 +387,24 @@ recheck_state: goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + send_ack = NULL; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { + call->acks_lost_top = call->tx_top; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + send_ack = &call->acks_lost_ping; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + send_ack) { if (call->ackr_reason) { - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, send_ack); goto recheck_state; } } if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto recheck_state; } @@ -404,6 +421,7 @@ recheck_state: set(call->expect_req_by); set(call->expect_term_by); set(call->ack_at); + set(call->ack_lost_at); set(call->resend_at); set(call->ping_at); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b305970a9b63..7ee3d6ce5aa2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -197,6 +197,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) unsigned long j = now + MAX_JIFFY_OFFSET; call->ack_at = j; + call->ack_lost_at = j; call->resend_at = j; call->ping_at = j; call->expect_rx_by = j; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c89647eae86d..23a5e61d8f79 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -630,6 +630,43 @@ found: orig_serial, ack_serial, sent_at, resp_time); } +/* + * Process the response to a ping that we sent to find out if we lost an ACK. + * + * If we got back a ping response that indicates a lower tx_top than what we + * had at the time of the ping transmission, we adjudge all the DATA packets + * sent between the response tx_top and the ping-time tx_top to have been lost. + */ +static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) +{ + rxrpc_seq_t top, bottom, seq; + bool resend = false; + + spin_lock_bh(&call->lock); + + bottom = call->tx_hard_ack + 1; + top = call->acks_lost_top; + if (before(bottom, top)) { + for (seq = bottom; before_eq(seq, top); seq++) { + int ix = seq & RXRPC_RXTX_BUFF_MASK; + u8 annotation = call->rxtx_annotations[ix]; + u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; + + if (anno_type != RXRPC_TX_ANNO_UNACK) + continue; + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = annotation; + resend = true; + } + } + + spin_unlock_bh(&call->lock); + + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + /* * Process a ping response. */ @@ -645,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, smp_rmb(); ping_serial = call->ping_serial; + if (orig_serial == call->acks_lost_ping) + rxrpc_input_check_for_lost_ack(call); + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) return; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f47659c7b224..efe06edce189 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -95,7 +95,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -165,6 +166,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); + if (_serial) + *_serial = serial; if (ping) { call->ping_serial = serial; @@ -323,7 +326,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && - (retrans || + (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || + retrans || call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), @@ -370,6 +374,18 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + if (call->peer->rtt_usage > 1) { + unsigned long nowj = jiffies, ack_lost_at; + + ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); + if (ack_lost_at < 1) + ack_lost_at = 1; + + ack_lost_at += nowj; + WRITE_ONCE(call->ack_lost_at, ack_lost_at); + rxrpc_reduce_call_timer(call, ack_lost_at, nowj, + rxrpc_timer_set_for_lost_ack); + } } } _leave(" = %d [%u]", ret, call->peer->maxdata); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index fad5f42a3abd..cc21e8db25b0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -148,7 +148,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } #endif @@ -222,7 +222,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, true, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index c56ee54fdd1f..a1c53ac066a1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -285,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); if (!skb) { size_t size, chunk, max, space; -- cgit v1.2.3 From 415f44e43282a16ec0808c7ccfd401762e587437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Add keepalive for a call We need to transmit a packet every so often to act as a keepalive for the peer (which has a timeout from the last time it received a packet) and also to prevent any intervening firewalls from closing the route. Do this by resetting a timer every time we transmit a packet. If the timer ever expires, we transmit a PING ACK packet and thereby also elicit a PING RESPONSE ACK from the other side - which prevents our last-rx timeout from expiring. The timer is set to 1/6 of the last-rx timeout so that we can detect the other side going away if it misses 6 replies in a row. This is particularly necessary for servers where the processing of the service function may take a significant amount of time. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 ++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 10 ++++++++++ net/rxrpc/output.c | 23 +++++++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 84ade8b76a19..e98fed6de497 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_keepalive, rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, @@ -152,6 +153,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_keepalive, rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, @@ -162,6 +164,7 @@ enum rxrpc_timer_trace { enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_keepalive, rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, @@ -311,6 +314,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_keepalive, "ExpKA ") \ EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ @@ -321,6 +325,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_keepalive, "KeepAl") \ EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ @@ -330,6 +335,7 @@ enum rxrpc_congest_change { #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ + EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7e7b817c69f0..cdcbc798f921 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -519,6 +519,7 @@ struct rxrpc_call { unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ + unsigned long keepalive_at; /* When next to send a keepalive ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ unsigned long expect_term_by; /* When we expect call termination by */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c65666b2f39e..bda952ffe6a6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -366,6 +366,15 @@ recheck_state: set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); } + t = READ_ONCE(call->keepalive_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); + cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ack_ping_for_keepalive); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -423,6 +432,7 @@ recheck_state: set(call->ack_at); set(call->ack_lost_at); set(call->resend_at); + set(call->keepalive_at); set(call->ping_at); now = jiffies; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index efe06edce189..42410e910aff 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -32,6 +32,24 @@ struct rxrpc_abort_buffer { __be32 abort_code; }; +/* + * Arrange for a keepalive ping a certain time after we last transmitted. This + * lets the far side know we're still interested in this call and helps keep + * the route through any intervening firewall open. + * + * Receiving a response to the ping will prevent the ->expect_rx_by timer from + * expiring. + */ +static void rxrpc_set_keepalive(struct rxrpc_call *call) +{ + unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + + keepalive_at += now; + WRITE_ONCE(call->keepalive_at, keepalive_at); + rxrpc_reduce_call_timer(call, keepalive_at, now, + rxrpc_timer_set_for_keepalive); +} + /* * Fill out an ACK packet. */ @@ -205,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, call->ackr_seen = top; spin_unlock_bh(&call->lock); } + + rxrpc_set_keepalive(call); } out: @@ -388,6 +408,9 @@ done: } } } + + rxrpc_set_keepalive(call); + _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; -- cgit v1.2.3 From f859ab61875978eeaa539740ff7f7d91f5d60006 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Fix service endpoint expiry RxRPC service endpoints expire like they're supposed to by the following means: (1) Mark dead rxrpc_net structs (with ->live) rather than twiddling the global service conn timeout, otherwise the first rxrpc_net struct to die will cause connections on all others to expire immediately from then on. (2) Mark local service endpoints for which the socket has been closed (->service_closed) so that the expiration timeout can be much shortened for service and client connections going through that endpoint. (3) rxrpc_put_service_conn() needs to schedule the reaper when the usage count reaches 1, not 0, as idle conns have a 1 count. (4) The accumulator for the earliest time we might want to schedule for should be initialised to jiffies + MAX_JIFFY_OFFSET, not ULONG_MAX as the comparison functions use signed arithmetic. (5) Simplify the expiration handling, adding the expiration value to the idle timestamp each time rather than keeping track of the time in the past before which the idle timestamp must go to be expired. This is much easier to read. (6) Ignore the timeouts if the net namespace is dead. (7) Restart the service reaper work item rather the client reaper. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 13 +++++++++++++ net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/conn_client.c | 2 ++ net/rxrpc/conn_object.c | 42 ++++++++++++++++++++++++------------------ net/rxrpc/net_ns.c | 3 +++ 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e98fed6de497..36cb50c111a6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -49,6 +49,7 @@ enum rxrpc_conn_trace { rxrpc_conn_put_client, rxrpc_conn_put_service, rxrpc_conn_queued, + rxrpc_conn_reap_service, rxrpc_conn_seen, }; @@ -221,6 +222,7 @@ enum rxrpc_congest_change { EM(rxrpc_conn_put_client, "PTc") \ EM(rxrpc_conn_put_service, "PTs") \ EM(rxrpc_conn_queued, "QUE") \ + EM(rxrpc_conn_reap_service, "RPs") \ E_(rxrpc_conn_seen, "SEE") #define rxrpc_client_traces \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c0cdcf980ffc..abb524c2b8f8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -867,6 +867,19 @@ static int rxrpc_release_sock(struct sock *sk) sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdcbc798f921..a0082c407005 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -84,6 +84,7 @@ struct rxrpc_net { unsigned int nr_client_conns; unsigned int nr_active_client_conns; bool kill_all_client_conns; + bool live; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head waiting_client_conns; @@ -265,6 +266,7 @@ struct rxrpc_local { rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; + bool service_closed; /* Service socket closed */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -881,6 +883,7 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 97f6a8de4845..785dfdb9fef1 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1079,6 +1079,8 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index a01a3791f06a..726eda6140ae 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -20,7 +20,8 @@ /* * Time till a connection expires after last use (in seconds). */ -unsigned int rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_destroy_connection(struct rcu_head *); @@ -321,7 +322,7 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) { + if (n == 1) { rxnet = conn->params.local->rxnet; rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); } @@ -363,15 +364,14 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(to_delayed_work(work), struct rxrpc_net, service_conn_reaper); - unsigned long reap_older_than, earliest, idle_timestamp, now; + unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); now = jiffies; - reap_older_than = now - rxrpc_connection_expiry * HZ; - earliest = ULONG_MAX; + earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { @@ -381,15 +381,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - idle_timestamp = READ_ONCE(conn->idle_timestamp); - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), - (long)reap_older_than - (long)idle_timestamp); - - if (time_after(idle_timestamp, reap_older_than)) { - if (time_before(idle_timestamp, earliest)) - earliest = idle_timestamp; - continue; + if (rxnet->live) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } } /* The usage count sits at 1 whilst the object is unused on the @@ -397,6 +403,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; + trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); if (rxrpc_conn_is_client(conn)) BUG(); @@ -407,10 +414,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } write_unlock(&rxnet->conn_lock); - if (earliest != ULONG_MAX) { - _debug("reschedule reaper %ld", (long) earliest - now); + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, earliest - now); } @@ -439,7 +446,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - rxrpc_connection_expiry = 0; cancel_delayed_work(&rxnet->client_conn_reaper); rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5..684c51d600c7 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -22,6 +22,7 @@ static __net_init int rxrpc_init_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); int ret; + rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; @@ -60,6 +61,7 @@ static __net_init int rxrpc_init_net(struct net *net) return 0; err_proc: + rxnet->live = false; return ret; } @@ -70,6 +72,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); + rxnet->live = false; rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_locals(rxnet); -- cgit v1.2.3 From 3d18cbb7fd0cfdf0b2ca18139950a4b0c1a0a220 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Fix conn expiry timers Fix the rxrpc connection expiry timers so that connections for closed AF_RXRPC sockets get deleted in a more timely fashion, freeing up the transport UDP port much more quickly. (1) Replace the delayed work items with work items plus timers so that timer_reduce() can be used to shorten them and so that the timer doesn't requeue the work item if the net namespace is dead. (2) Don't use queue_delayed_work() as that won't alter the timeout if the timer is already running. (3) Don't rearm the timers if the network namespace is dead. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 ++ net/rxrpc/ar-internal.h | 6 ++++-- net/rxrpc/conn_client.c | 30 +++++++++++++++++++----------- net/rxrpc/conn_object.c | 28 +++++++++++++++++----------- net/rxrpc/net_ns.c | 30 ++++++++++++++++++++++++++---- 5 files changed, 68 insertions(+), 28 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index abb524c2b8f8..8f7cf4c042be 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -895,6 +895,8 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); + rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper); + rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper); rxrpc_put_local(rx->local); rx->local = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a0082c407005..416688381eb7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -79,7 +79,8 @@ struct rxrpc_net { struct list_head conn_proc_list; /* List of conns in this namespace for proc */ struct list_head service_conns; /* Service conns in this namespace */ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ - struct delayed_work service_conn_reaper; + struct work_struct service_conn_reaper; + struct timer_list service_conn_reap_timer; unsigned int nr_client_conns; unsigned int nr_active_client_conns; @@ -90,7 +91,8 @@ struct rxrpc_net { struct list_head waiting_client_conns; struct list_head active_client_conns; struct list_head idle_client_conns; - struct delayed_work client_conn_reaper; + struct work_struct client_conn_reaper; + struct timer_list client_conn_reap_timer; struct list_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 785dfdb9fef1..7f74ca3059f8 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -691,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); ret = rxrpc_get_client_conn(call, cp, srx, gfp); @@ -756,6 +756,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) } } +/* + * Set the reap timer. + */ +static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +{ + unsigned long now = jiffies; + unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; + + if (rxnet->live) + timer_reduce(&rxnet->client_conn_reap_timer, reap_at); +} + /* * Disconnect a client call. */ @@ -896,9 +908,7 @@ idle_connection: list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); if (rxnet->idle_client_conns.next == &conn->cache_link && !rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - rxrpc_conn_idle_client_expiry); + rxrpc_set_client_reap_timer(rxnet); } else { trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; @@ -1036,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, client_conn_reaper); + container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; bool did_discard = false; @@ -1116,9 +1125,8 @@ not_yet_expired: */ _debug("not yet"); if (!rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - conn_expires_at - now); + timer_reduce(&rxnet->client_conn_reap_timer, + conn_expires_at); out: spin_unlock(&rxnet->client_conn_cache_lock); @@ -1138,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) rxnet->kill_all_client_conns = true; spin_unlock(&rxnet->client_conn_cache_lock); - cancel_delayed_work(&rxnet->client_conn_reaper); + del_timer_sync(&rxnet->client_conn_reap_timer); - if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) + if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) _debug("destroy: queue failed"); _leave(""); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 726eda6140ae..1aad04a32d5e 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -310,22 +310,30 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) return conn; } +/* + * Set the service connection reap timer. + */ +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at) +{ + if (rxnet->live) + timer_reduce(&rxnet->service_conn_reap_timer, reap_at); +} + /* * Release a service connection */ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 1) { - rxnet = conn->params.local->rxnet; - rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); - } + if (n == 1) + rxrpc_set_service_reap_timer(conn->params.local->rxnet, + jiffies + rxrpc_connection_expiry); } /* @@ -362,8 +370,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, service_conn_reaper); + container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); @@ -417,8 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, - earliest - now); + rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { @@ -446,8 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - cancel_delayed_work(&rxnet->client_conn_reaper); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); + del_timer_sync(&rxnet->service_conn_reap_timer); + rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); write_lock(&rxnet->conn_lock); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 684c51d600c7..f18c9248e0d4 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -14,6 +14,24 @@ unsigned int rxrpc_net_id; +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, client_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->client_conn_reaper); +} + +static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, service_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->service_conn_reaper); +} + /* * Initialise a per-network namespace record. */ @@ -32,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); - INIT_DELAYED_WORK(&rxnet->service_conn_reaper, - rxrpc_service_connection_reaper); + INIT_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + timer_setup(&rxnet->service_conn_reap_timer, + rxrpc_service_conn_reap_timeout, 0); rxnet->nr_client_conns = 0; rxnet->nr_active_client_conns = 0; @@ -43,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->waiting_client_conns); INIT_LIST_HEAD(&rxnet->active_client_conns); INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_DELAYED_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); + INIT_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + timer_setup(&rxnet->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); INIT_LIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); -- cgit v1.2.3 From febae9bc946de2c85e324b20bb7dc7b2d4e49d37 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Mon, 27 Mar 2017 16:20:57 +0100 Subject: drm: hdlcd: Update PM code to save/restore console. Update the PM code to suspend/resume the fbdev_cma console. Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 764d0c83710c..45b174fea36b 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -432,9 +433,11 @@ static int __maybe_unused hdlcd_pm_suspend(struct device *dev) return 0; drm_kms_helper_poll_disable(drm); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 1); hdlcd->state = drm_atomic_helper_suspend(drm); if (IS_ERR(hdlcd->state)) { + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); return PTR_ERR(hdlcd->state); } @@ -451,8 +454,8 @@ static int __maybe_unused hdlcd_pm_resume(struct device *dev) return 0; drm_atomic_helper_resume(drm, hdlcd->state); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); - pm_runtime_set_active(dev); return 0; } -- cgit v1.2.3 From 860ec7c6e21cecca8c508a66ee211b56d87f5bd7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 23 Nov 2017 19:26:20 +0100 Subject: s390/debug: use pK for kernel pointers the s390 debug feature (/sys/kernel/debug/s390dbf/) shows the kernel pointer of the calling function even for kptr_restrict == 2. Let us use pK instead of p. This hides the kernel addresses for kptr_restrict == 2: root@host $ echo 2 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 0000000000000000 snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 1 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 0 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 58b9e127b615..80e974adb9e8 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", area, sec, usec, level, except_str, entry->id.fields.cpuid, (void *)caller); return rc; -- cgit v1.2.3 From 6a55d2cdf1bc140665cbfaed14de79acaf3758c4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:00 +0100 Subject: s390: block: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/block/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Stefan Haberland Cc: Jan Hoeppner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 1 + drivers/s390/block/dasd_devmap.c | 1 + drivers/s390/block/dasd_diag.c | 1 + drivers/s390/block/dasd_eckd.c | 1 + drivers/s390/block/dasd_fba.c | 1 + drivers/s390/block/dcssblk.c | 1 + drivers/s390/block/scm_blk.c | 1 + drivers/s390/block/xpram.c | 1 + 8 files changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 29f35e29d480..fdb3b133e5e0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index c95a4784c191..e7cd28ff1984 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 98fb28e49d2c..f035c2f25d35 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Based on.......: linux/drivers/s390/block/mdisk.c diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8eafcd5fa004..1a41ef496338 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 6168ccdb389c..a6b132f7e869 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Bugreports.to..: diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 7abb240847c0..6aaefb780436 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * dcssblk.c -- the S/390 block driver for dcss memory * diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index eb51893c74a4..b4130c7880d8 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Block driver for s390 storage class memory. * diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 571a0709e1e5..2a6334ca750e 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Xpram.c -- the S/390 expanded memory RAM-disk * -- cgit v1.2.3 From 812141a9fe61446c948a71456c58090ac11a6d14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:01 +0100 Subject: s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 1 + drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/pkey_api.c | 1 + drivers/s390/crypto/zcrypt_api.c | 1 + drivers/s390/crypto/zcrypt_api.h | 1 + drivers/s390/crypto/zcrypt_card.c | 1 + drivers/s390/crypto/zcrypt_cca_key.h | 1 + drivers/s390/crypto/zcrypt_cex2a.c | 1 + drivers/s390/crypto/zcrypt_cex2a.h | 1 + drivers/s390/crypto/zcrypt_cex4.c | 1 + drivers/s390/crypto/zcrypt_error.h | 1 + drivers/s390/crypto/zcrypt_msgtype50.c | 1 + drivers/s390/crypto/zcrypt_msgtype50.h | 1 + drivers/s390/crypto/zcrypt_msgtype6.c | 1 + drivers/s390/crypto/zcrypt_msgtype6.h | 1 + drivers/s390/crypto/zcrypt_pcixcc.c | 1 + drivers/s390/crypto/zcrypt_pcixcc.h | 1 + drivers/s390/crypto/zcrypt_queue.c | 1 + 18 files changed, 18 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index faeba9db3d95..658626039171 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 7e45c4d08cad..b43e4d650940 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 8dda5bb34a2f..869ace34972d 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * pkey device driver * diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index a9a56aa9c26b..620407ac2faa 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 73541a798db7..4883d51521fd 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index f85dacf1c284..c0e7072f18a9 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index 12cff6262566..ebb9fbcb9bc0 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index b97c5d5ee5a4..e00ffa44f85c 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 0dce4b9af184..6f6830e0bab3 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index e2eebc775a37..f305538334ad 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * Author(s): Holger Dengler diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 13df60209ed3..8d8892d14bd0 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index db5bde47dfb0..8fdf5d65c4d2 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 5cc280318ee7..f6ce799c2239 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 785620d30504..3a19b69b846b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 7a0d5b57821f..52d140889377 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 600604782b65..9963018a6bbc 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index eacafc8962f2..a85778d5763c 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 4742be0eec24..5103727b4633 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * -- cgit v1.2.3 From 724117b77bbe2b28f27728d58a432ed22630e33f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:02 +0100 Subject: s390: cio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/cio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Cornelia Huck Cc: Dong Jia Shi Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 1 + drivers/s390/cio/chp.c | 1 + drivers/s390/cio/chsc.c | 1 + drivers/s390/cio/chsc_sch.c | 1 + drivers/s390/cio/cio.c | 1 + drivers/s390/cio/cmf.c | 1 + drivers/s390/cio/css.c | 1 + drivers/s390/cio/device.c | 1 + drivers/s390/cio/device_fsm.c | 1 + drivers/s390/cio/device_ops.c | 1 + drivers/s390/cio/eadm_sch.c | 1 + drivers/s390/cio/isc.c | 1 + drivers/s390/cio/qdio_main.c | 1 + drivers/s390/cio/qdio_setup.c | 1 + drivers/s390/cio/scm.c | 1 + drivers/s390/cio/vfio_ccw_drv.c | 1 + 16 files changed, 16 insertions(+) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index e2f7b6e93efd..bfec1485ca23 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bus driver for ccwgroup * diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index f4166f80c4d4..5c94a3aec4dd 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 1999, 2010 * Author(s): Cornelia Huck (cornelia.huck@de.ibm.com) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 7b0b295b2313..c08fc5a8df0c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- channel subsystem call * diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 8e7e19b9e92c..0015729d917d 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 chsc subchannels * diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 89216174fcbb..987bf9a8c9f7 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- low level i/o calls * diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 7d59230e88bb..320ce009ba41 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux on zSeries Channel Measurement Facility support * diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index d3e504c3c362..f4c2e71e7a56 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * driver for channel subsystem * diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 318d8269f5de..e4ed6c651bca 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * bus driver for ccw devices * diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index dd7d79d30edc..1319122e9d12 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * finite state machine for device handling * diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index cf8c4ac6323a..aa125d92cc65 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Copyright IBM Corp. 2002, 2009 * diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index ce16e4f45d44..53468ae64b99 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 eadm subchannels * diff --git a/drivers/s390/cio/isc.c b/drivers/s390/cio/isc.c index c592087be0f1..77fde9f5ea8b 100644 --- a/drivers/s390/cio/isc.c +++ b/drivers/s390/cio/isc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Functions for registration of I/O interruption subclasses on s390. * diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index ed4852fab44b..59b4a3370cd5 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux for s390 qdio support, buffer handling, qdio API and module support. * diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 9ae1380cbc31..98f3cfdc0d02 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * qdio queue initialization * diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index 1fa53ecdc2aa..6bca1d5455d4 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Recognize and maintain s390 storage class memory. * diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 82f05c4b8c52..ea6a2d0b2894 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * VFIO based Physical Subchannel device driver * -- cgit v1.2.3 From 6f05e69e44d8167d3c97c3b3b657a94f3cb41d71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:03 +0100 Subject: s390: char: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/char/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/fs3270.c | 1 + drivers/s390/char/hmcdrv_mod.c | 1 + drivers/s390/char/monreader.c | 1 + drivers/s390/char/monwriter.c | 1 + drivers/s390/char/raw3270.c | 1 + drivers/s390/char/sclp_async.c | 1 + drivers/s390/char/tape_34xx.c | 1 + drivers/s390/char/tape_3590.c | 1 + drivers/s390/char/tape_class.c | 1 + drivers/s390/char/tape_core.c | 1 + drivers/s390/char/tty3270.c | 1 + drivers/s390/char/vmlogrdr.c | 1 + drivers/s390/char/vmur.c | 1 + drivers/s390/char/zcore.c | 1 + 14 files changed, 14 insertions(+) diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index c4518168fd02..61822480a2a0 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - fullscreen driver. * diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c index 251a318a9b75..1447d0887225 100644 --- a/drivers/s390/char/hmcdrv_mod.c +++ b/drivers/s390/char/hmcdrv_mod.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * HMC Drive DVD Module * diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index 027ac6ae5eea..bf4ab4efed73 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for reading z/VM *MONITOR service records. * diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 571a7e352755..76c158c41510 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for writing z/VM *MONITOR service records. * diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 5d4f053d7c38..f8cd2935fbfd 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - core functions. * diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index 19c25427f27f..ee6f3b563728 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Enable Asynchronous Notification via SCLP. * diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index de69f0ddc321..6d73ee3f827a 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3480/3490 tapes. * diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index e352047ed9f7..37e65a05517f 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3590 tapes. * diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index e7d23048d3f0..a07102472ce9 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2004 * diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 32503a60ee85..8d3370da2dfc 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * basic function of the tape device driver * diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index e417ccd9e299..1c98023cffd4 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - tty functions. * diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 62559dc0169f..069b9ef08206 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * character device driver for reading z/VM system service records * diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index fa90ef05afc0..52aa89424318 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux driver for System z and s390 unit record devices * (z/VM virtual punch, reader, printer) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index aaed778f67c4..dd86559ce7fb 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same -- cgit v1.2.3 From ab9953ff0f2e37092dc247ddd7c62fe6f03618dc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:04 +0100 Subject: s390: net: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/net/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Julian Wiedmann Cc: Ursula Braun Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/net/ctcm_main.c | 1 + drivers/s390/net/fsm.c | 1 + drivers/s390/net/lcs.c | 1 + drivers/s390/net/netiucv.c | 1 + drivers/s390/net/qeth_core_main.c | 1 + drivers/s390/net/qeth_core_sys.c | 1 + drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + drivers/s390/net/smsgiucv.c | 1 + drivers/s390/net/smsgiucv_app.c | 1 + 10 files changed, 10 insertions(+) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index be9f17218531..7ce98b70cad3 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2001, 2009 * Author(s): diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index 8c14c6c3ad3d..f0c7c182b077 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /** * A generic FSM based on fsm used in isdn4linux * diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index e131a03262ad..36899e94d268 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux for S/390 Lan Channel Station Network Driver * diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index b9c7c1e61da2..150053131baf 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV network driver * diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49b9efeba1bd..98a7f84540ab 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index b22ed2a57acd..ae81534de912 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d2537c09126d..93d7e345d180 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aadd384316a3..0f8c12738b06 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index a851d34c642b..93405e0bad3c 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV special message driver * diff --git a/drivers/s390/net/smsgiucv_app.c b/drivers/s390/net/smsgiucv_app.c index 32515a201bbc..0a263999f7ae 100644 --- a/drivers/s390/net/smsgiucv_app.c +++ b/drivers/s390/net/smsgiucv_app.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Deliver z/VM CP special messages (SMSG) as uevents. * -- cgit v1.2.3 From 40bf411ee6d1f2d9833486a9d4318734180a997e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:05 +0100 Subject: s390: scsi: zfcp_aux: add SPDX identifier It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/scsi/zfcp_aux.c file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Steffen Maier Cc: Benjamin Block Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_aux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 84752152d41f..a3a8c8d9d717 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * zfcp device driver * -- cgit v1.2.3 From 31fb16730a0d8baf0c04c93ebf606340c9ea8393 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:06 +0100 Subject: s390: virtio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/virtio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Cornelia Huck Cc: Halil Pasic Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/virtio/Makefile | 1 + drivers/s390/virtio/virtio_ccw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index f68af1f317f1..7c6f0c8db883 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b18fe2014cf2..56346caadb21 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ccw based virtio transport * -- cgit v1.2.3 From 0b622e60bc6c4eca75d517b10f15914ecd58e6b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:07 +0100 Subject: s390: crypto: Remove redundant license text Now that the SPDX tag is in all drivers/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 14 -------------- drivers/s390/crypto/ap_bus.h | 14 -------------- drivers/s390/crypto/pkey_api.c | 5 ----- drivers/s390/crypto/zcrypt_api.c | 14 -------------- drivers/s390/crypto/zcrypt_api.h | 14 -------------- drivers/s390/crypto/zcrypt_card.c | 10 ---------- drivers/s390/crypto/zcrypt_cca_key.h | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.c | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.h | 14 -------------- drivers/s390/crypto/zcrypt_error.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.h | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.c | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.h | 14 -------------- drivers/s390/crypto/zcrypt_queue.c | 10 ---------- 17 files changed, 221 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 658626039171..48d55dc9e986 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "ap" diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index b43e4d650940..e0827eaa42f1 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus header file. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _AP_BUS_H_ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 869ace34972d..e7c2e4f9529a 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "pkey" diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 620407ac2faa..ce15f101ee28 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 4883d51521fd..9fff8912f6e3 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_API_H_ diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c0e7072f18a9..233e1e695208 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index ebb9fbcb9bc0..011d61d8a4ae 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CCA_KEY_H_ diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index e00ffa44f85c..e701194d3611 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 6f6830e0bab3..c3c116777c93 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CEX2A_H_ diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 8d8892d14bd0..01598d83c60a 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_ERROR_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 8fdf5d65c4d2..afe1b2bcd7ec 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index f6ce799c2239..0a36545cfb8e 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE50_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 3a19b69b846b..f54bef4a928e 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 52d140889377..d314f4525518 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE6_H_ diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 9963018a6bbc..159b0a0dd211 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index a85778d5763c..d678a3af83a7 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_PCIXCC_H_ diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 5103727b4633..720434e18007 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include -- cgit v1.2.3 From 13d1d559f04a893b4a32ec04fb9d7210ec4d9597 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:08 +0100 Subject: s390: drivers: Remove redundant license text Now that the SPDX tag is in all drivers/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Julian Wiedmann Cc: Ursula Braun Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/zcore.c | 1 - drivers/s390/cio/cmf.c | 14 -------------- drivers/s390/cio/css.c | 2 -- drivers/s390/cio/device.c | 2 -- drivers/s390/cio/device_ops.c | 2 -- drivers/s390/net/lcs.c | 14 -------------- drivers/s390/net/netiucv.c | 15 --------------- drivers/s390/net/smsgiucv.c | 14 -------------- drivers/s390/virtio/Makefile | 4 ---- drivers/s390/virtio/virtio_ccw.c | 4 ---- 10 files changed, 72 deletions(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index dd86559ce7fb..4369662cfff5 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -8,7 +8,6 @@ * * Copyright IBM Corp. 2003, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "zdump" diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 320ce009ba41..5e495c62cfa7 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -8,20 +8,6 @@ * Cornelia Huck * * original idea from Natarajan Krishnaswami - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index f4c2e71e7a56..0f11dce6e224 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -6,8 +6,6 @@ * * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e4ed6c651bca..75a245f38e2e 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -6,8 +6,6 @@ * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index aa125d92cc65..1caf6a398760 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -4,8 +4,6 @@ * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #include #include diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 36899e94d268..92ae84a927fc 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -8,20 +8,6 @@ * Rewritten by * Frank Pavlic and * Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "lcs" diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 150053131baf..5ce2424ca729 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -19,21 +19,6 @@ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * Alan Altmark (Alan_Altmark@us.ibm.com) Sept. 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define KMSG_COMPONENT "netiucv" diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 93405e0bad3c..3b0c8b8a7634 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -5,20 +5,6 @@ * Copyright IBM Corp. 2003, 2009 * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index 7c6f0c8db883..2dc4d9aab634 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -2,9 +2,5 @@ # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License (version 2 only) -# as published by the Free Software Foundation. obj-$(CONFIG_S390_GUEST) += virtio_ccw.o diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 56346caadb21..ba2e0856d22c 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ -- cgit v1.2.3 From b1c0de0e51504b84cf4a3eb46541e98557bb460d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 15:19:32 +0100 Subject: s390: sthyi: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/sthyi file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sthyi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 12981e197f01..db163cf3606a 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * store hypervisor information instruction emulation functions. * -- cgit v1.2.3 From a17ae4c3a6add7579e9962df5dd12cb1f3bed431 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:32 +0100 Subject: s390: kernel: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 1 + arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/kprobes.c | 1 + arch/s390/kernel/lgr.c | 1 + arch/s390/kernel/module.c | 1 + arch/s390/kernel/nmi.c | 1 + arch/s390/kernel/perf_cpum_cf.c | 1 + arch/s390/kernel/perf_cpum_sf.c | 1 + arch/s390/kernel/perf_event.c | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/kernel/stacktrace.c | 1 + arch/s390/kernel/time.c | 1 + arch/s390/kernel/topology.c | 1 + arch/s390/kernel/vdso.c | 1 + arch/s390/kernel/vdso32/clock_getres.S | 1 + arch/s390/kernel/vdso32/clock_gettime.S | 1 + arch/s390/kernel/vdso32/gettimeofday.S | 1 + arch/s390/kernel/vdso64/clock_getres.S | 1 + arch/s390/kernel/vdso64/clock_gettime.S | 1 + arch/s390/kernel/vdso64/gettimeofday.S | 1 + arch/s390/kernel/vtime.c | 1 + 22 files changed, 22 insertions(+) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 0a0e14335a04..b2c68fbf2634 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Disassemble s390 instructions. * diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2aa545dca4d5..5b23c4f6e50c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack dumping functions * diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 310e59e6eb4b..8ecb8726ac47 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ipl/reipl/dump support for Linux on s390. * diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1a6521af1751..ee055f7bdb10 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel Probes (KProbes) * diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index bf9622f0e6b1..452502f9a0d9 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux Guest Relocation (LGR) detection * diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7b87991416fd..10d104589978 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel module help for s390. * diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 6ff169253cae..c7a627620e5e 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Machine check handler * diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d03423333..8f6296860186 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x - CPU-measurement Counter Facility * diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 227b38bd82c9..6e404caac856 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for the System z CPU-measurement Sampling Facility * diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 93a386f4a3b5..0ef308851374 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x * diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 090053cf279b..793da97f9a6e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2012 diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index e66687dc6144..460dcfba7d4e 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack trace management functions * diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5cbd52169348..44c012c866d2 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Time of day based timer functions. * diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index f9b393d4a078..4d5b65e527b5 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Heiko Carstens diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 39a218703c50..88f6fab68eb2 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * vdso setup for s390 * diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eca3f001f081..eb9f22cbfd47 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index a5769b83d90e..b642c0382238 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 63b86dceb0bf..8a89b54246d7 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index c8513deb8c66..416bf711c2aa 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 5d7b56b49458..d2dee331b7cd 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index b02e62f3bc12..cf62a1611054 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index dd7178fbb4f3..f24395a01918 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Virtual cpu timer based timer functions. * -- cgit v1.2.3 From 20a884f5e0180a6f67bd650bdb9d703c415c1436 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:34 +0100 Subject: s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 1 + arch/s390/crypto/arch_random.c | 1 + arch/s390/crypto/crc32-vx.c | 1 + arch/s390/crypto/des_s390.c | 1 + arch/s390/crypto/ghash_s390.c | 1 + arch/s390/crypto/paes_s390.c | 1 + arch/s390/crypto/prng.c | 1 + arch/s390/crypto/sha.h | 1 + arch/s390/crypto/sha256_s390.c | 1 + arch/s390/crypto/sha512_s390.c | 1 + arch/s390/crypto/sha_common.c | 1 + 11 files changed, 11 insertions(+) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b48e20dd94e9..5a7b60a65f96 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 36aefc07d10c..7ad00354fb25 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 arch random implementation. * diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b..436865926c26 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Crypto-API module for CRC-32 algorithms implemented with the * z/Architecture Vector Extension Facility. diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d296662bbf0..c871b1086408 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 564616d48d8b..3b7f96c9eead 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a4e903ed7e21..53926a2b7285 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 3e47c4a0f18b..a97a1802cfb4 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 10f200790079..19d344d49392 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 53c277999a28..4a31d6416d41 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 2f4caa1ef123..e22ed139d0b6 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index c740f77285b2..d359ec7fdd6b 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * -- cgit v1.2.3 From ac41aaeedc62648208eb1b32cff768a9ffcfdd23 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:35 +0100 Subject: s390: mm: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/mm/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 1 + arch/s390/mm/gmap.c | 1 + arch/s390/mm/mmap.c | 1 + arch/s390/mm/pgtable.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 2dbdcd85b68f..3596ef91cf68 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Collaborative memory management interface. * diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b2c140193b0a..05d459b638f5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * KVM guest address space mapping code * diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5bea139517a2..5ec9a8c2b768 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * flexible mmap layout support * diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae677f814bc0..4f2b65d01a70 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Martin Schwidefsky -- cgit v1.2.3 From adbb3901685fb94a4a0ac62f7859724964054287 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:36 +0100 Subject: s390: pci: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/pci/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 1 + arch/s390/pci/pci_debug.c | 1 + arch/s390/pci/pci_dma.c | 1 + arch/s390/pci/pci_insn.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0fe649c0d542..4902fed221c0 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c2f786f0ea06..b482e95b6249 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012,2015 * diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 0d300ee00f4e..f7aa5a77827e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 81b840bc6e4e..19bcb3b45a70 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 specific pci instructions * -- cgit v1.2.3 From 0caa8cdf1a02a8b45dbd8641c7b3e896051c1254 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:37 +0100 Subject: s390: appldata: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/appldata/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 1 + arch/s390/appldata/appldata_mem.c | 1 + arch/s390/appldata/appldata_net_sum.c | 1 + arch/s390/appldata/appldata_os.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ef3fb1b9201f..cb6e8066b1ad 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 598df5708501..e68136c3c23a 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 66037d2622b4..8bc14b0d1def 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 45b3178200ab..433a994b1a89 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). -- cgit v1.2.3 From 0b73214f8a4d7790efd68b9cc183d26e472ccefd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:38 +0100 Subject: s390: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the remaining arch/s390/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 1 + arch/s390/boot/install.sh | 1 + arch/s390/hypfs/inode.c | 1 + arch/s390/include/asm/cpu_mf.h | 1 + arch/s390/include/asm/kprobes.h | 1 + arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/asm/kvm_para.h | 1 + arch/s390/include/asm/livepatch.h | 1 + arch/s390/include/asm/syscall.h | 1 + arch/s390/include/asm/sysinfo.h | 1 + 10 files changed, 10 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 6b3f41985f28..ffd95861f9ac 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # s390/Makefile # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index aed3069699bd..32a9367b9bf0 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 # # arch/s390x/boot/install.sh # diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index cf8a2d92467f..05d62cdb78b5 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Hypervisor filesystem for Linux on s390. * diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 792cda339af1..a9ba5e322bf3 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * CPU-measurement facilities * diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 921391f2341e..396b065ebb2c 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _ASM_S390_KPROBES_H #define _ASM_S390_KPROBES_H /* diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f3a9b5a445b6..66efb2979351 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kernel virtual machines on s390 * diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 41393052ac57..2c623bf5c095 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for paravirtual devices on s390 * diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 6de5c6cb0061..d1f97fafcc20 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * livepatch.h - s390-specific Kernel Live Patching Core * diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6bc941be6921..4d6463eb7a2d 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Access to user system call parameters and results * diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index a702cb9d4269..c2137e7d9310 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for store system information stsi * -- cgit v1.2.3 From 53634237e72b4f3c2dff2f92ec35792207730a98 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:40 +0100 Subject: s390: kernel: Remove redundant license text Now that the SPDX tag is in all arch/s390/kernel/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 14 -------------- arch/s390/kernel/module.c | 14 -------------- arch/s390/kernel/perf_cpum_cf.c | 4 ---- arch/s390/kernel/perf_cpum_sf.c | 4 ---- arch/s390/kernel/perf_event.c | 4 ---- arch/s390/kernel/sthyi.c | 4 ---- arch/s390/kernel/vdso.c | 4 ---- arch/s390/kernel/vdso32/clock_getres.S | 4 ---- arch/s390/kernel/vdso32/clock_gettime.S | 4 ---- arch/s390/kernel/vdso32/gettimeofday.S | 4 ---- arch/s390/kernel/vdso64/clock_getres.S | 4 ---- arch/s390/kernel/vdso64/clock_gettime.S | 4 ---- arch/s390/kernel/vdso64/gettimeofday.S | 4 ---- 13 files changed, 72 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index ee055f7bdb10..af3722c28fd9 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -2,20 +2,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 10d104589978..b7abfad4fd7d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -9,20 +9,6 @@ * * based on i386 version * Copyright (C) 2001 Rusty Russell. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 8f6296860186..cc085e2d2ce9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_cf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 6e404caac856..1c9ddd7aa5ec 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_sf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0ef308851374..0d770e513abf 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "perf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index db163cf3606a..80b862e9c53c 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -2,10 +2,6 @@ /* * store hypervisor information instruction emulation functions. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Copyright IBM Corp. 2016 * Author(s): Janosch Frank */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 88f6fab68eb2..f3a1c7c6824e 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eb9f22cbfd47..f61df5253c23 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index b642c0382238..2d6ec3abe095 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 8a89b54246d7..aa8bf13a2edb 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 416bf711c2aa..faf5213b15df 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index d2dee331b7cd..6046b3bfca46 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index cf62a1611054..cc9dbc27da6f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include -- cgit v1.2.3 From 94bf2f28c9923abe7a1fccc126a86a2401cb5a47 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:41 +0100 Subject: s390: include: Remove redundant license text Now that the SPDX tag is in all arch/s390/include/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 4 ---- arch/s390/include/asm/kprobes.h | 14 -------------- arch/s390/include/asm/kvm_host.h | 4 ---- arch/s390/include/asm/kvm_para.h | 6 ------ arch/s390/include/asm/livepatch.h | 7 ------- arch/s390/include/asm/syscall.h | 4 ---- arch/s390/include/asm/sysinfo.h | 4 ---- arch/s390/include/uapi/asm/kvm.h | 4 ---- arch/s390/include/uapi/asm/kvm_para.h | 4 ---- arch/s390/include/uapi/asm/kvm_perf.h | 4 ---- arch/s390/include/uapi/asm/virtio-ccw.h | 4 ---- arch/s390/include/uapi/asm/zcrypt.h | 14 -------------- 12 files changed, 73 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index a9ba5e322bf3..dd08db491b89 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -5,10 +5,6 @@ * Copyright IBM Corp. 2012 * Author(s): Hendrik Brueckner * Jan Glauber - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 396b065ebb2c..13de80cf741c 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -4,20 +4,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * 2002-Oct Created by Vamsi Krishna S Kernel diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 66efb2979351..e14f381757f6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 2c623bf5c095..74eeec9c0a80 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ /* @@ -21,8 +17,6 @@ * * Copyright IBM Corp. 2007,2008 * Author(s): Christian Borntraeger - * - * This work is licensed under the terms of the GNU GPL, version 2. */ #ifndef __S390_KVM_PARA_H #define __S390_KVM_PARA_H diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index d1f97fafcc20..672f95b12d40 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -8,13 +8,6 @@ * Jiri Slaby */ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 4d6463eb7a2d..96f9a9151fde 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_SYSCALL_H diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index c2137e7d9310..25057c118d56 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2001, 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Ulrich Weigand * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9ad172dcd912..38535a57fef8 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -6,10 +6,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h index 0dc86b3a7cb0..b9ab584adf43 100644 --- a/arch/s390/include/uapi/asm/kvm_para.h +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -4,9 +4,5 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h index c36c97ffdc6f..84606b8cc49e 100644 --- a/arch/s390/include/uapi/asm/kvm_perf.h +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -4,10 +4,6 @@ * * Copyright 2014 IBM Corp. * Author(s): Alexander Yarygin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef __LINUX_KVM_PERF_S390_H diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h index 967aad390105..3a77833c74dc 100644 --- a/arch/s390/include/uapi/asm/virtio-ccw.h +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ #ifndef __KVM_VIRTIO_CCW_H diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 137ef473584e..d568307321fc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -9,20 +9,6 @@ * Eric Rossman (edrossma@us.ibm.com) * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __ASM_S390_ZCRYPT_H -- cgit v1.2.3 From a876ca4ddef01e8737da5c672e878c67798cb975 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:42 +0100 Subject: s390: crypto: Remove redundant license text Now that the SPDX tag is in all arch/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 6 ------ arch/s390/crypto/arch_random.c | 5 ----- arch/s390/crypto/des_s390.c | 6 ------ arch/s390/crypto/paes_s390.c | 5 ----- arch/s390/crypto/sha.h | 6 ------ arch/s390/crypto/sha256_s390.c | 6 ------ arch/s390/crypto/sha512_s390.c | 6 ------ arch/s390/crypto/sha_common.c | 6 ------ 8 files changed, 46 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5a7b60a65f96..d60798737d86 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -12,12 +12,6 @@ * Harald Freudenberger * * Derived from "crypto/aes_generic.c" - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #define KMSG_COMPONENT "aes_s390" diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 7ad00354fb25..8720e9203ecf 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #include diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index c871b1086408..5346b5a80bb6 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -7,12 +7,6 @@ * Copyright IBM Corp. 2003, 2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 53926a2b7285..003932db8d12 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -8,11 +8,6 @@ * Copyright IBM Corp. 2017 * Author(s): Martin Schwidefsky * Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "paes_s390" diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 19d344d49392..d6f8258b44df 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 4a31d6416d41..944aa6b237cd 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -7,12 +7,6 @@ * s390 Version: * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index e22ed139d0b6..b17eded532b1 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index d359ec7fdd6b..cf0718d121bc 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include -- cgit v1.2.3 From fec37202e8a5c060e0c46cd353bf0b6d26b9cb02 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:43 +0100 Subject: s390: Remove redundant license text Now that the SPDX tag is in all arch/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording in the remaining files can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 4 ---- arch/s390/boot/install.sh | 4 ---- arch/s390/hypfs/inode.c | 1 - arch/s390/mm/mmap.c | 15 --------------- 4 files changed, 24 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ffd95861f9ac..de54cfc6109d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -7,10 +7,6 @@ # for "archclean" and "archdep" for cleaning up and making dependencies for # this architecture # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1994 by Linus Torvalds # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 32a9367b9bf0..bed227f267ae 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -3,10 +3,6 @@ # # arch/s390x/boot/install.sh # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1995 by Linus Torvalds # # Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 05d62cdb78b5..43bbe63e2992 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2006, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "hypfs" diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5ec9a8c2b768..831bdcf407bb 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -5,21 +5,6 @@ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * * Started by Ingo Molnar */ -- cgit v1.2.3 From 345f8f34bb473241d62803951c18a844dd705f8d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 16:23:15 +0100 Subject: s390: revert ELF_ET_DYN_BASE base changes This reverts commit a73dc5370e153ac63718d850bddf0c9aa9d871e6. Reducing the base address for 31-bit PIE executables from (STACK_TOP/3)*2 to 4MB broke several compat programs which use -fpie to move the executable out of the lower 16MB. Cc: # 4.13+ Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9a3cb3983c01..1a61b1b997f2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -194,13 +194,14 @@ struct arch_elf_state { #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* - * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address - * space open for things that want to use the area for 32-bit pointers. - */ -#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ - 0x100000000UL) +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. 64-bit + tasks are aligned to 4GB. */ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ + (STACK_TOP / 3 * 2) : \ + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ -- cgit v1.2.3 From c2cc215cde5b3fd4102c9595df66c6e4ff237be4 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Fri, 11 Aug 2017 15:32:48 +0300 Subject: drm: mali-dp: switch to drm_*_get(), drm_*_put() helpers Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() and drm_*_unreference() helpers. drm_*_reference() and drm_*_unreference() functions are just compatibility alias for drm_*_get() and drm_*_put() and should not be used by new code. So convert all users of compatibility functions to use the new APIs. Generated by: scripts/coccinelle/api/drm-get-put.cocci Signed-off-by: Cihangir Akturk Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_planes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 94e7e3fa3408..f12f8eb126be 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -57,7 +57,7 @@ static void malidp_de_plane_destroy(struct drm_plane *plane) struct malidp_plane *mp = to_malidp_plane(plane); if (mp->base.fb) - drm_framebuffer_unreference(mp->base.fb); + drm_framebuffer_put(mp->base.fb); drm_plane_helper_disable(plane); drm_plane_cleanup(plane); -- cgit v1.2.3 From 0970d7a2f5b0dcdd520c7655210d677f6e9a878e Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split patch into hdlcd and mali-dp versions] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index b8944666a18f..a2b698c983b4 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -671,7 +671,7 @@ query_hw_fail: malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); alloc_fail: of_reserved_mem_device_release(dev); @@ -704,7 +704,7 @@ static void malidp_unbind(struct device *dev) malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); of_reserved_mem_device_release(dev); } -- cgit v1.2.3 From a6993b215a719ad5758c1bced5f8df95add070bf Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 15:48:43 +0100 Subject: drm: mali-dp: Separate static internal data into a read-only structure. The malidp_hw_device structure that the driver uses to handle the differences between versions of the IP contains both non-changeable data and fields that get updated at probe time. Previously we were copying the read-only part into allocated memory, but that can be completely avoided by splitting the structure into a read-only part and keeping the runtime modifiable fields into the old structure. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 13 ++++---- drivers/gpu/drm/arm/malidp_drv.c | 30 ++++++++--------- drivers/gpu/drm/arm/malidp_hw.c | 46 ++++++++++++++------------ drivers/gpu/drm/arm/malidp_hw.h | 65 ++++++++++++++++++++++--------------- drivers/gpu/drm/arm/malidp_planes.c | 19 +++++------ 5 files changed, 93 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 3615d18a7ddf..153a49670626 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -65,8 +65,8 @@ static void malidp_crtc_atomic_enable(struct drm_crtc *crtc, /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); - hwdev->modeset(hwdev, &vm); - hwdev->leave_config_mode(hwdev); + hwdev->hw->modeset(hwdev, &vm); + hwdev->hw->leave_config_mode(hwdev); drm_crtc_vblank_on(crtc); } @@ -78,7 +78,8 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, int err; drm_crtc_vblank_off(crtc); - hwdev->enter_config_mode(hwdev); + hwdev->hw->enter_config_mode(hwdev); + clk_disable_unprepare(hwdev->pxlclk); err = pm_runtime_put(crtc->dev->dev); @@ -319,7 +320,7 @@ static int malidp_crtc_atomic_check_scaling(struct drm_crtc *crtc, mclk_calc: drm_display_mode_to_videomode(&state->adjusted_mode, &vm); - ret = hwdev->se_calc_mclk(hwdev, s, &vm); + ret = hwdev->hw->se_calc_mclk(hwdev, s, &vm); if (ret < 0) return -EINVAL; return 0; @@ -475,7 +476,7 @@ static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); return 0; } @@ -485,7 +486,7 @@ static void malidp_crtc_disable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); } static const struct drm_crtc_funcs malidp_crtc_funcs = { diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index a2b698c983b4..91f2b0191368 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -47,10 +47,10 @@ static void malidp_write_gamma_table(struct malidp_hw_device *hwdev, * directly. */ malidp_hw_write(hwdev, gamma_write_mask, - hwdev->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); for (i = 0; i < MALIDP_COEFFTAB_NUM_COEFFS; ++i) malidp_hw_write(hwdev, data[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_DATA); } @@ -103,7 +103,7 @@ void malidp_atomic_commit_update_coloradj(struct drm_crtc *crtc, for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; ++i) malidp_hw_write(hwdev, mc->coloradj_coeffs[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COLOR_ADJ_COEF + 4 * i); malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_CADJ, @@ -120,8 +120,8 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; struct malidp_se_config *s = &cs->scaler_config; struct malidp_se_config *old_s = &old_cs->scaler_config; - u32 se_control = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 se_control = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC); u32 layer_control = se_control + MALIDP_SE_LAYER_CONTROL; u32 scr = se_control + MALIDP_SE_SCALING_CONTROL; @@ -135,7 +135,7 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, return; } - hwdev->se_set_scaling_coeffs(hwdev, s, old_s); + hwdev->hw->se_set_scaling_coeffs(hwdev, s, old_s); val = malidp_hw_read(hwdev, se_control); val |= MALIDP_SE_SCALING_EN | MALIDP_SE_ALPHA_EN; @@ -170,9 +170,9 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm) int ret; atomic_set(&malidp->config_valid, 0); - hwdev->set_config_valid(hwdev); + hwdev->hw->set_config_valid(hwdev); /* don't wait for config_valid flag if we are in config mode */ - if (hwdev->in_config_mode(hwdev)) + if (hwdev->hw->in_config_mode(hwdev)) return 0; ret = wait_event_interruptible_timeout(malidp->wq, @@ -455,7 +455,7 @@ static int malidp_runtime_pm_suspend(struct device *dev) struct malidp_hw_device *hwdev = malidp->dev; /* we can only suspend if the hardware is in config mode */ - WARN_ON(!hwdev->in_config_mode(hwdev)); + WARN_ON(!hwdev->hw->in_config_mode(hwdev)); hwdev->pm_suspended = true; clk_disable_unprepare(hwdev->mclk); @@ -500,11 +500,7 @@ static int malidp_bind(struct device *dev) if (!hwdev) return -ENOMEM; - /* - * copy the associated data from malidp_drm_of_match to avoid - * having to keep a reference to the OF node after binding - */ - memcpy(hwdev, of_device_get_match_data(dev), sizeof(*hwdev)); + hwdev->hw = (struct malidp_hw *)of_device_get_match_data(dev); malidp->dev = hwdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -568,13 +564,13 @@ static int malidp_bind(struct device *dev) goto query_hw_fail; } - ret = hwdev->query_hw(hwdev); + ret = hwdev->hw->query_hw(hwdev); if (ret) { DRM_ERROR("Invalid HW configuration\n"); goto query_hw_fail; } - version = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_DE_CORE_ID); + version = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_DE_CORE_ID); DRM_INFO("found ARM Mali-DP%3x version r%dp%d\n", version >> 16, (version >> 12) & 0xf, (version >> 8) & 0xf); @@ -589,7 +585,7 @@ static int malidp_bind(struct device *dev) for (i = 0; i < MAX_OUTPUT_CHANNELS; i++) out_depth = (out_depth << 8) | (output_width[i] & 0xf); - malidp_hw_write(hwdev, out_depth, hwdev->map.out_depth_base); + malidp_hw_write(hwdev, out_depth, hwdev->hw->map.out_depth_base); atomic_set(&malidp->config_valid, 0); init_waitqueue_head(&malidp->wq); diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 17bca99e8ac8..2bfb542135ac 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -183,7 +183,7 @@ static void malidp500_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) break; /* @@ -203,7 +203,7 @@ static void malidp500_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP500_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -216,7 +216,7 @@ static bool malidp500_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) return true; @@ -407,7 +407,7 @@ static void malidp550_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) break; /* @@ -427,7 +427,7 @@ static void malidp550_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP550_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -440,7 +440,7 @@ static bool malidp550_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) return true; @@ -616,7 +616,7 @@ static int malidp650_query_hw(struct malidp_hw_device *hwdev) return 0; } -const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = { +const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { [MALIDP_500] = { .map = { .coeffs_base = MALIDP500_COEFFS_BASE, @@ -751,7 +751,7 @@ static void malidp_hw_clear_irq(struct malidp_hw_device *hwdev, u8 block, u32 ir { u32 base = malidp_get_block_base(hwdev, block); - if (hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) + if (hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) malidp_hw_write(hwdev, irq, base + MALIDP_REG_CLEARIRQ); else malidp_hw_write(hwdev, irq, base + MALIDP_REG_STATUS); @@ -762,12 +762,14 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev; + struct malidp_hw *hw; const struct malidp_irq_map *de; u32 status, mask, dc_status; irqreturn_t ret = IRQ_NONE; hwdev = malidp->dev; - de = &hwdev->map.de_irq_map; + hw = hwdev->hw; + de = &hw->map.de_irq_map; /* * if we are suspended it is likely that we were invoked because @@ -778,8 +780,8 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) return IRQ_NONE; /* first handle the config valid IRQ */ - dc_status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); - if (dc_status & hwdev->map.dc_irq_map.vsync_irq) { + dc_status = malidp_hw_read(hwdev, hw->map.dc_base + MALIDP_REG_STATUS); + if (dc_status & hw->map.dc_irq_map.vsync_irq) { /* we have a page flip event */ atomic_set(&malidp->config_valid, 1); malidp_hw_clear_irq(hwdev, MALIDP_DC_BLOCK, dc_status); @@ -832,11 +834,11 @@ int malidp_de_irq_init(struct drm_device *drm, int irq) /* first enable the DC block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); /* now enable the DE block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); return 0; } @@ -847,9 +849,9 @@ void malidp_de_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); malidp_hw_disable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); } static irqreturn_t malidp_se_irq(int irq, void *arg) @@ -857,6 +859,8 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev = malidp->dev; + struct malidp_hw *hw = hwdev->hw; + const struct malidp_irq_map *se = &hw->map.se_irq_map; u32 status, mask; /* @@ -867,12 +871,12 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) if (hwdev->pm_suspended) return IRQ_NONE; - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); - if (!(status & hwdev->map.se_irq_map.irq_mask)) + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); + if (!(status & se->irq_mask)) return IRQ_NONE; - mask = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_MASKIRQ); - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); + mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ); + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); status &= mask; /* ToDo: status decoding and firing up of VSYNC and page flip events */ @@ -905,7 +909,7 @@ int malidp_se_irq_init(struct drm_device *drm, int irq) } malidp_hw_enable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); return 0; } @@ -916,5 +920,5 @@ void malidp_se_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); } diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h index 849ad9a30c3a..b0690ebb3565 100644 --- a/drivers/gpu/drm/arm/malidp_hw.h +++ b/drivers/gpu/drm/arm/malidp_hw.h @@ -120,18 +120,14 @@ struct malidp_hw_regmap { /* Unlike DP550/650, DP500 has 3 stride registers in its video layer. */ #define MALIDP_DEVICE_LV_HAS_3_STRIDES BIT(0) -struct malidp_hw_device { - const struct malidp_hw_regmap map; - void __iomem *regs; +struct malidp_hw_device; - /* APB clock */ - struct clk *pclk; - /* AXI clock */ - struct clk *aclk; - /* main clock for display core */ - struct clk *mclk; - /* pixel clock for display core */ - struct clk *pxlclk; +/* + * Static structure containing hardware specific data and pointers to + * functions that behave differently between various versions of the IP. + */ +struct malidp_hw { + const struct malidp_hw_regmap map; /* * Validate the driver instance against the hardware bits @@ -182,15 +178,6 @@ struct malidp_hw_device { struct videomode *vm); u8 features; - - u8 min_line_size; - u16 max_line_size; - - /* track the device PM state */ - bool pm_suspended; - - /* size of memory used for rotating layers, up to two banks available */ - u32 rotation_memory[2]; }; /* Supported variants of the hardware */ @@ -202,7 +189,33 @@ enum { MALIDP_MAX_DEVICES }; -extern const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES]; +extern const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES]; + +/* + * Structure used by the driver during runtime operation. + */ +struct malidp_hw_device { + struct malidp_hw *hw; + void __iomem *regs; + + /* APB clock */ + struct clk *pclk; + /* AXI clock */ + struct clk *aclk; + /* main clock for display core */ + struct clk *mclk; + /* pixel clock for display core */ + struct clk *pxlclk; + + u8 min_line_size; + u16 max_line_size; + + /* track the device PM state */ + bool pm_suspended; + + /* size of memory used for rotating layers, up to two banks available */ + u32 rotation_memory[2]; +}; static inline u32 malidp_hw_read(struct malidp_hw_device *hwdev, u32 reg) { @@ -240,9 +253,9 @@ static inline u32 malidp_get_block_base(struct malidp_hw_device *hwdev, { switch (block) { case MALIDP_SE_BLOCK: - return hwdev->map.se_base; + return hwdev->hw->map.se_base; case MALIDP_DC_BLOCK: - return hwdev->map.dc_base; + return hwdev->hw->map.dc_base; } return 0; @@ -275,7 +288,7 @@ u8 malidp_hw_get_format_id(const struct malidp_hw_regmap *map, static inline bool malidp_hw_pitch_valid(struct malidp_hw_device *hwdev, unsigned int pitch) { - return !(pitch & (hwdev->map.bus_align_bytes - 1)); + return !(pitch & (hwdev->hw->map.bus_align_bytes - 1)); } /* U16.16 */ @@ -308,8 +321,8 @@ static inline void malidp_se_set_enh_coeffs(struct malidp_hw_device *hwdev) }; u32 val = MALIDP_SE_SET_ENH_LIMIT_LOW(MALIDP_SE_ENH_LOW_LEVEL) | MALIDP_SE_SET_ENH_LIMIT_HIGH(MALIDP_SE_ENH_HIGH_LEVEL); - u32 image_enh = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 image_enh = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC) + MALIDP_SE_IMAGE_ENH; u32 enh_coeffs = image_enh + MALIDP_SE_ENH_COEFF0; int i; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index f12f8eb126be..e7419797bbd1 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -185,8 +185,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, fb = state->fb; - ms->format = malidp_hw_get_format_id(&mp->hwdev->map, mp->layer->id, - fb->format->format); + ms->format = malidp_hw_get_format_id(&mp->hwdev->hw->map, + mp->layer->id, + fb->format->format); if (ms->format == MALIDP_INVALID_FORMAT_ID) return -EINVAL; @@ -211,7 +212,7 @@ static int malidp_de_plane_check(struct drm_plane *plane, * third plane stride register. */ if (ms->n_planes == 3 && - !(mp->hwdev->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && + !(mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && (state->fb->pitches[1] != state->fb->pitches[2])) return -EINVAL; @@ -229,9 +230,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, if (state->rotation & MALIDP_ROTATED_MASK) { int val; - val = mp->hwdev->rotmem_required(mp->hwdev, state->crtc_h, - state->crtc_w, - fb->format->format); + val = mp->hwdev->hw->rotmem_required(mp->hwdev, state->crtc_h, + state->crtc_w, + fb->format->format); if (val < 0) return val; @@ -251,7 +252,7 @@ static void malidp_de_set_plane_pitches(struct malidp_plane *mp, return; if (num_planes == 3) - num_strides = (mp->hwdev->features & + num_strides = (mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) ? 3 : 2; for (i = 0; i < num_strides; ++i) @@ -264,13 +265,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct malidp_plane *mp; - const struct malidp_hw_regmap *map; struct malidp_plane_state *ms = to_malidp_plane_state(plane->state); u32 src_w, src_h, dest_w, dest_h, val; int i; mp = to_malidp_plane(plane); - map = &mp->hwdev->map; /* convert src values from Q16 fixed point to integer */ src_w = plane->state->src_w >> 16; @@ -363,7 +362,7 @@ static const struct drm_plane_helper_funcs malidp_de_plane_helper_funcs = { int malidp_de_planes_init(struct drm_device *drm) { struct malidp_drm *malidp = drm->dev_private; - const struct malidp_hw_regmap *map = &malidp->dev->map; + const struct malidp_hw_regmap *map = &malidp->dev->hw->map; struct malidp_plane *plane = NULL; enum drm_plane_type plane_type; unsigned long crtcs = 1 << drm->mode_config.num_crtc; -- cgit v1.2.3 From 54243016ae35a0912a680f884835237fd6176820 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 17:39:24 +0100 Subject: drm: mali-dp: Disable planes when their CRTC gets disabled. Make sure only the planes on the active CRTCs get committed and that all planes on the disabled CRTCs get turned off. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 153a49670626..904fff80917b 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -77,6 +77,9 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; int err; + /* always disable planes on the CRTC that is being turned off */ + drm_atomic_helper_disable_planes_on_crtc(old_state, false); + drm_crtc_vblank_off(crtc); hwdev->hw->enter_config_mode(hwdev); -- cgit v1.2.3 From 26c0a26d78bc7c2943d55121a32cb85a4594f8ea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Nov 2017 10:12:33 -0700 Subject: nvme-fc: don't use bit masks for set/test_bit() numbers So far harmless, but it's confusing and a bug waiting to happen if the shifts grow larger than 4. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e0577bf33f45..0a8af4daef89 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -31,8 +31,8 @@ enum nvme_fc_queue_flags { - NVME_FC_Q_CONNECTED = (1 << 0), - NVME_FC_Q_LIVE = (1 << 1), + NVME_FC_Q_CONNECTED = 0, + NVME_FC_Q_LIVE, }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ -- cgit v1.2.3 From ca43a0c73d1d32e2c24698b1b9044557a0b197d4 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 19 Nov 2017 22:21:08 -0500 Subject: forcedeth: replace pci_unmap_page with dma_unmap_page The function pci_unmap_page is obsolete. So it is replaced with the function dma_unmap_page. CC: Srinivas Eeda CC: Joe Jin CC: Junxiao Bi Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index ac8439ceea10..481876b5424c 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1986,9 +1986,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb) tx_skb->dma_len, DMA_TO_DEVICE); else - pci_unmap_page(np->pci_dev, tx_skb->dma, + dma_unmap_page(&np->pci_dev->dev, tx_skb->dma, tx_skb->dma_len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); tx_skb->dma = 0; } } -- cgit v1.2.3 From 01a95b2141e337dea15ad48e60a35c72a9b10205 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2017 09:35:25 +0100 Subject: cfg80211: select CRYPTO_SHA256 if needed When regulatory database certificates are built-in, they're currently using the SHA256 digest algorithm, so add that to the build in that case. Also add a note that for custom certificates, one may need to add the right algorithms. Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index da91bb547db3..1abcc4fc4df1 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -20,6 +20,10 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + # may need to update this when certificates are changed and are + # using a different algorithm, though right now they shouldn't + # (this is here rather than below to allow it to be a module) + select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS ---help--- cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. @@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR certificates like in the kernel sources (net/wireless/certs/) that shall be accepted for a signed regulatory database. + Note that you need to also select the correct CRYPTO_ modules + for your certificates, and if cfg80211 is built-in they also must be. + config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS -- cgit v1.2.3 From bb22cafd75686d799dabfe422571fac4b5c2ed94 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Fri, 24 Nov 2017 15:14:24 -0800 Subject: bcache: add a comment in journal bucket reading Journal bucket is a circular buffer, the bucket can be like YYYNNNYY, which means the first valid journal in the 7th bucket, and the latest valid journal in third bucket, in this case, if we do not try we the zero index first, We may get a valid journal in the 7th bucket, then we call find_next_bit(bitmap,ca->sb.njournal_buckets, l + 1) to get the first invalid bucket after the 7th bucket, because all these buckets is valid, so no bit 1 in bitmap, thus find_next_bit() function would return with ca->sb.njournal_buckets (8). So, after that, bcache only read journal in 7th and 8the bucket, the first to the third buckets are lost. So, it is important to let developer know that, we need to try the zero index at first in the hash-search, and avoid any breaks in future's code modification. [ML: Fixed whitespace & formatting & file permissions] Signed-off-by: Tang Junhui Signed-off-by: Michael Lyle Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 02a98ddb592d..5018c56ebb67 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) * find a sequence of buckets with valid journal entries */ for (i = 0; i < ca->sb.njournal_buckets; i++) { + /* + * We must try the index l with ZERO first for + * correctness due to the scenario that the journal + * bucket is circular buffer which might have wrapped + */ l = (i * 2654435769U) % ca->sb.njournal_buckets; if (test_bit(l, bitmap)) -- cgit v1.2.3 From cf33c1ee5254c6a430bc1538232b49c3ea13e613 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 24 Nov 2017 15:14:25 -0800 Subject: bcache: Fix building error on MIPS This patch try to fix the building error on MIPS. The reason is MIPS has already defined the PTR macro, which conflicts with the PTR macro in include/uapi/linux/bcache.h. [fixed by mlyle: corrected a line-length issue] Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 2 +- drivers/md/bcache/extents.c | 2 +- drivers/md/bcache/journal.c | 2 +- include/uapi/linux/bcache.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a27d85232ce1..a0cc1bc6d884 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 41c238fc3733..f9d391711595 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5018c56ebb67..a87165c1d8e5 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -512,7 +512,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 90fc490f973f..821f71a2e48f 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -91,7 +91,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ -- cgit v1.2.3 From e393aa2446150536929140739f09c6ecbcbea7f0 Mon Sep 17 00:00:00 2001 From: Rui Hua Date: Fri, 24 Nov 2017 15:14:26 -0800 Subject: bcache: recover data from backing when data is clean When we send a read request and hit the clean data in cache device, there is a situation called cache read race in bcache(see the commit in the tail of cache_look_up(), the following explaination just copy from there): The bucket we're reading from might be reused while our bio is in flight, and we could then end up reading the wrong data. We guard against this by checking (in bch_cache_read_endio()) if the pointer is stale again; if so, we treat it as an error (s->iop.error = -EINTR) and reread from the backing device (but we don't pass that error up anywhere) It should be noted that cache read race happened under normal circumstances, not the circumstance when SSD failed, it was counted and shown in /sys/fs/bcache/XXX/internal/cache_read_races. Without this patch, when we use writeback mode, we will never reread from the backing device when cache read race happened, until the whole cache device is clean, because the condition (s->recoverable && (dc && !atomic_read(&dc->has_dirty))) is false in cached_dev_read_error(). In this situation, the s->iop.error(= -EINTR) will be passed up, at last, user will receive -EINTR when it's bio end, this is not suitable, and wield to up-application. In this patch, we use s->read_dirty_data to judge whether the read request hit dirty data in cache device, it is safe to reread data from the backing device when the read request hit clean data. This can not only handle cache read race, but also recover data when failed read request from cache device. [edited by mlyle to fix up whitespace, commit log title, comment spelling] Fixes: d59b23795933 ("bcache: only permit to recovery read error when cache device is clean") Cc: # 4.14 Signed-off-by: Hua Rui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3a7aed7282b2..643c3021624f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); /* - * If cache device is dirty (dc->has_dirty is non-zero), then - * recovery a failed read request from cached device may get a - * stale data back. So read failure recovery is only permitted - * when cache device is clean. + * If read request hit dirty data (s->read_dirty_data is true), + * then recovery a failed read request from cached device may + * get a stale data back. So read failure recovery is only + * permitted when read request hit clean data in cache device, + * or when cache read race happened. */ - if (s->recoverable && - (dc && !atomic_read(&dc->has_dirty))) { + if (s->recoverable && !s->read_dirty_data) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); -- cgit v1.2.3 From 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: bcache: check return value of register_shrinker register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 11c5503d31dc..81e8dc3dbe5e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } -- cgit v1.2.3 From c14ca8386539a298c1c19b003fe55e37d0f0e89c Mon Sep 17 00:00:00 2001 From: Ondrej Mosnáček Date: Thu, 23 Nov 2017 13:49:06 +0100 Subject: crypto: skcipher - Fix skcipher_walk_aead_common The skcipher_walk_aead_common function calls scatterwalk_copychunks on the input and output walks to skip the associated data. If the AD end at an SG list entry boundary, then after these calls the walks will still be pointing to the end of the skipped region. These offsets are later checked for alignment in skcipher_walk_next, so the skcipher_walk may detect the alignment incorrectly. This patch fixes it by calling scatterwalk_done after the copychunks calls to ensure that the offsets refer to the right SG list entry. Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") Cc: Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/skcipher.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 4faa0fd53b0c..6c45ed536664 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -517,6 +517,9 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2); scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2); + scatterwalk_done(&walk->in, 0, walk->total); + scatterwalk_done(&walk->out, 0, walk->total); + walk->iv = req->iv; walk->oiv = req->iv; -- cgit v1.2.3 From fa6d7cb5d76cf0467c61420fc9238045aedfd379 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 23 Nov 2017 22:34:31 +0300 Subject: net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts Don't offload IP header checksum to NIC. This fixes a previous patch which enabled checksum offloading for both IPv4 and IPv6 packets. So L3 checksum offload was getting enabled for IPv6 pkts. And HW is dropping these pkts as it assumes the pkt is IPv4 when IP csum offload is set in the SQ descriptor. Fixes: 3a9024f52c2e ("net: thunderx: Enable TSO and checksum offloads for ipv6") Signed-off-by: Sunil Goutham Signed-off-by: Aleksey Makarov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index d4496e9afcdf..8b2c31e2a2b0 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1355,7 +1355,6 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, /* Offload checksum calculation to HW */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->csum_l3 = 1; /* Enable IP csum calculation */ hdr->l3_offset = skb_network_offset(skb); hdr->l4_offset = skb_transport_offset(skb); -- cgit v1.2.3 From 540c11159dcece5c4a8157a7b39336316085470f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 24 Nov 2017 14:05:36 +0300 Subject: net: thunderbolt: Stop using zero to mean no valid DMA mapping Commit 86dabda426ac ("net: thunderbolt: Clear finished Tx frame bus address in tbnet_tx_callback()") fixed a DMA-API violation where the driver called dma_unmap_page() in tbnet_free_buffers() for a bus address that might already be unmapped. The fix was to zero out the bus address of a frame in tbnet_tx_callback(). However, as pointed out by David Miller, zero might well be valid mapping (at least in theory) so it is not good idea to use it here. It turns out that we don't need the whole map/unmap dance for Tx buffers at all. Instead we can map the buffers when they are initially allocated and unmap them when the interface is brought down. In between we just DMA sync the buffers for the CPU or device as needed. Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 57 ++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 228d4aa6d9ae..ca5e375de27c 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -335,7 +335,7 @@ static void tbnet_free_buffers(struct tbnet_ring *ring) if (ring->ring->is_tx) { dir = DMA_TO_DEVICE; order = 0; - size = tbnet_frame_size(tf); + size = TBNET_FRAME_SIZE; } else { dir = DMA_FROM_DEVICE; order = TBNET_RX_PAGE_ORDER; @@ -512,6 +512,7 @@ err_free: static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) { struct tbnet_ring *ring = &net->tx_ring; + struct device *dma_dev = tb_ring_dma_device(ring->ring); struct tbnet_frame *tf; unsigned int index; @@ -522,7 +523,9 @@ static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) tf = &ring->frames[index]; tf->frame.size = 0; - tf->frame.buffer_phy = 0; + + dma_sync_single_for_cpu(dma_dev, tf->frame.buffer_phy, + tbnet_frame_size(tf), DMA_TO_DEVICE); return tf; } @@ -531,13 +534,8 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, bool canceled) { struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame); - struct device *dma_dev = tb_ring_dma_device(ring); struct tbnet *net = netdev_priv(tf->dev); - dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), - DMA_TO_DEVICE); - tf->frame.buffer_phy = 0; - /* Return buffer to the ring */ net->tx_ring.prod++; @@ -548,10 +546,12 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, static int tbnet_alloc_tx_buffers(struct tbnet *net) { struct tbnet_ring *ring = &net->tx_ring; + struct device *dma_dev = tb_ring_dma_device(ring->ring); unsigned int i; for (i = 0; i < TBNET_RING_SIZE; i++) { struct tbnet_frame *tf = &ring->frames[i]; + dma_addr_t dma_addr; tf->page = alloc_page(GFP_KERNEL); if (!tf->page) { @@ -559,7 +559,17 @@ static int tbnet_alloc_tx_buffers(struct tbnet *net) return -ENOMEM; } + dma_addr = dma_map_page(dma_dev, tf->page, 0, TBNET_FRAME_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { + __free_page(tf->page); + tf->page = NULL; + tbnet_free_buffers(ring); + return -ENOMEM; + } + tf->dev = net->dev; + tf->frame.buffer_phy = dma_addr; tf->frame.callback = tbnet_tx_callback; tf->frame.sof = TBIP_PDF_FRAME_START; tf->frame.eof = TBIP_PDF_FRAME_END; @@ -881,19 +891,6 @@ static int tbnet_stop(struct net_device *dev) return 0; } -static bool tbnet_xmit_map(struct device *dma_dev, struct tbnet_frame *tf) -{ - dma_addr_t dma_addr; - - dma_addr = dma_map_page(dma_dev, tf->page, 0, tbnet_frame_size(tf), - DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, dma_addr)) - return false; - - tf->frame.buffer_phy = dma_addr; - return true; -} - static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, struct tbnet_frame **frames, u32 frame_count) { @@ -908,13 +905,14 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { /* No need to calculate checksum so we just update the - * total frame count and map the frames for DMA. + * total frame count and sync the frames for DMA. */ for (i = 0; i < frame_count; i++) { hdr = page_address(frames[i]->page); hdr->frame_count = cpu_to_le32(frame_count); - if (!tbnet_xmit_map(dma_dev, frames[i])) - goto err_unmap; + dma_sync_single_for_device(dma_dev, + frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); } return true; @@ -983,21 +981,14 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, *tucso = csum_fold(wsum); /* Checksum is finally calculated and we don't touch the memory - * anymore, so DMA map the frames now. + * anymore, so DMA sync the frames now. */ for (i = 0; i < frame_count; i++) { - if (!tbnet_xmit_map(dma_dev, frames[i])) - goto err_unmap; + dma_sync_single_for_device(dma_dev, frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); } return true; - -err_unmap: - while (i--) - dma_unmap_page(dma_dev, frames[i]->frame.buffer_phy, - tbnet_frame_size(frames[i]), DMA_TO_DEVICE); - - return false; } static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num, -- cgit v1.2.3 From a60b3f515d30d0fe8537c64671926879a3548103 Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Fri, 24 Nov 2017 12:27:58 +0100 Subject: net: sched: crash on blocks with goto chain action tcf_block_put_ext has assumed that all filters (and thus their goto actions) are destroyed in RCU callback and thus can not race with our list iteration. However, that is not true during netns cleanup (see tcf_exts_get_net comment). Prevent the user after free by holding all chains (except 0, that one is already held). foreach_safe is not enough in this case. To reproduce, run the following in a netns and then delete the ns: ip link add dtest type dummy tc qdisc add dev dtest ingress tc filter add dev dtest chain 1 parent ffff: handle 1 prio 1 flower action goto chain 2 Fixes: 822e86d997 ("net_sched: remove tcf_block_put_deferred()") Signed-off-by: Roman Kapl Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7d97f612c9b9..ddcf04b4ab43 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -336,7 +336,8 @@ static void tcf_block_put_final(struct work_struct *work) struct tcf_chain *chain, *tmp; rtnl_lock(); - /* Only chain 0 should be still here. */ + + /* At this point, all the chains should have refcnt == 1. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_put(chain); rtnl_unlock(); @@ -344,15 +345,21 @@ static void tcf_block_put_final(struct work_struct *work) } /* XXX: Standalone actions are not allowed to jump to any chain, and bound - * actions should be all removed after flushing. However, filters are now - * destroyed in tc filter workqueue with RTNL lock, they can not race here. + * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + /* Hold a refcnt for all chains, except 0, so that they don't disappear + * while we are iterating. + */ + list_for_each_entry(chain, &block->chain_list, list) + if (chain->index) + tcf_chain_hold(chain); + + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); tcf_block_offload_unbind(block, q, ei); -- cgit v1.2.3 From afbea2cd253b5198350dfd8edb963567d05827d6 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Fri, 24 Nov 2017 06:25:28 -0800 Subject: VSOCK: Don't call vsock_stream_has_data in atomic context When using the host personality, VMCI will grab a mutex for any queue pair access. In the detach callback for the vmci vsock transport, we call vsock_stream_has_data while holding a spinlock, and vsock_stream_has_data will access a queue pair. To avoid this, we can simply omit calling vsock_stream_has_data for host side queue pairs, since the QPs are empty per default when the guest has detached. This bug affects users of VMware Workstation using kernel version 4.4 and later. Testing: Ran vsock tests between guest and host, and verified that with this change, the host isn't calling vsock_stream_has_data during detach. Ran mixedTest between guest and host using both guest and host as server. v2: Rebased on top of recent change to sk_state values Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Stefan Hajnoczi Signed-off-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 391775e3575c..56573dc85709 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -797,9 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk) /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data - * left in our consume queue. + * left in our consume queue. If the local endpoint is a host, + * we can't call vsock_stream_has_data, since that may block, + * but a host endpoint can't read data once the VM has + * detached, so there is no available data in that case. */ - if (vsock_stream_has_data(vsk) <= 0) { + if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || + vsock_stream_has_data(vsk) <= 0) { sk->sk_state = TCP_CLOSE; if (sk->sk_state == TCP_SYN_SENT) { @@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.4.0-k"); +MODULE_VERSION("1.0.5.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); -- cgit v1.2.3 From 9e741045faea17e28663f14a45f7f3304827c968 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 24 Nov 2017 11:36:06 -0500 Subject: net: dsa: fix 'increment on 0' warning Setting the refcount to 0 when allocating a tree to match the number of switch devices it holds may cause an 'increment on 0; use-after-free', if CONFIG_REFCOUNT_FULL is enabled. To fix this, do not decrement the refcount of a newly allocated tree, increment it when an already allocated tree is found, and decrement it after the probing of a switch, as done with the previous behavior. At the same time, make dsa_tree_get and dsa_tree_put accept a NULL argument to simplify callers, and return the tree after incrementation, as most kref users like of_node_get and of_node_put do. Fixes: 8e5bf9759a06 ("net: dsa: simplify tree reference counting") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 44e3fb7dec8c..1e287420ff49 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_tree_list, &dst->list); - /* Initialize the reference counter to the number of switches, not 1 */ kref_init(&dst->refcount); - refcount_set(&dst->refcount.refcount, 0); return dst; } @@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst) kfree(dst); } -static struct dsa_switch_tree *dsa_tree_touch(int index) +static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) { - struct dsa_switch_tree *dst; - - dst = dsa_tree_find(index); - if (!dst) - dst = dsa_tree_alloc(index); + if (dst) + kref_get(&dst->refcount); return dst; } -static void dsa_tree_get(struct dsa_switch_tree *dst) +static struct dsa_switch_tree *dsa_tree_touch(int index) { - kref_get(&dst->refcount); + struct dsa_switch_tree *dst; + + dst = dsa_tree_find(index); + if (dst) + return dsa_tree_get(dst); + else + return dsa_tree_alloc(index); } static void dsa_tree_release(struct kref *ref) @@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref) static void dsa_tree_put(struct dsa_switch_tree *dst) { - kref_put(&dst->refcount, dsa_tree_release); + if (dst) + kref_put(&dst->refcount, dsa_tree_release); } static bool dsa_port_is_dsa(struct dsa_port *port) @@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds) mutex_lock(&dsa2_mutex); err = dsa_switch_probe(ds); + dsa_tree_put(ds->dst); mutex_unlock(&dsa2_mutex); return err; -- cgit v1.2.3 From 7bbefcfac1936c8d9082a828b09f42a3839cb06e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 24 Nov 2017 12:08:40 -0800 Subject: uapi: add SPDX identifier to vm_sockets_diag.h New file seems to have missed the SPDX license scan and update. Signed-off-by: Stephen Hemminger Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets_diag.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h index 14cd7dc5a187..0b4dd54f3d1e 100644 --- a/include/uapi/linux/vm_sockets_diag.h +++ b/include/uapi/linux/vm_sockets_diag.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* AF_VSOCK sock_diag(7) interface for querying open sockets */ #ifndef _UAPI__VM_SOCKETS_DIAG_H__ -- cgit v1.2.3 From cf21654b40968609779751b34e7923180968fe5b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 47 ++++++++++++++++------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 47d1c132ac40..1e3e9be7d77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -379,29 +379,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -574,9 +595,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } -- cgit v1.2.3 From d12fb13f23199faa7e536acec1db49068e5a067d Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: drm/amdkfd: Fix SDMA ring buffer size calculation ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4859d263fa2a..4728fad3fd74 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -202,8 +202,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; -- cgit v1.2.3 From 8c946b8988acec785bcf67088b6bd0747f36d2d3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: drm/amdkfd: Fix SDMA oversubsription handling SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2bec902fc939..a3f1e62c60ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -191,6 +191,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && -- cgit v1.2.3 From c393e9b2d51540b74e18e555df14706098dbf2cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 13 Nov 2017 18:08:48 +0200 Subject: drm/amdkfd: fix amdkfd use-after-free GP fault Fix GP fault caused by dev_info() reference to a struct device* after the device has been freed (use after free). kfd_chardev_exit() frees the device so 'kfd_device' should not be used after calling kfd_chardev_exit(). Signed-off-by: Randy Dunlap Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 6c5a9cab55de..f744caeaee04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #define KFD_DRIVER_AUTHOR "AMD Inc. and others" @@ -132,7 +133,7 @@ static void __exit kfd_module_exit(void) kfd_process_destroy_wq(); kfd_topology_shutdown(); kfd_chardev_exit(); - dev_info(kfd_device, "Removed module\n"); + pr_info("amdkfd: Removed module\n"); } module_init(kfd_module_init); -- cgit v1.2.3 From b4d085201d86af69cbda2214c6dafc0be240ef9f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Nov 2017 03:35:27 +0300 Subject: uapi: fix linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by via to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:236:2: error: unknown type name 'uint64_t' uint64_t va_addr; /* to KFD */ /usr/include/linux/kfd_ioctl.h:237:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:238:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/linux/kfd_ioctl.h:243:2: error: unknown type name 'uint64_t' uint64_t tile_config_ptr; /usr/include/linux/kfd_ioctl.h:245:2: error: unknown type name 'uint64_t' uint64_t macro_tile_config_ptr; /usr/include/linux/kfd_ioctl.h:249:2: error: unknown type name 'uint32_t' uint32_t num_tile_configs; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t num_macro_tile_configs; /usr/include/linux/kfd_ioctl.h:255:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:256:2: error: unknown type name 'uint32_t' uint32_t gb_addr_config; /* from KFD */ /usr/include/linux/kfd_ioctl.h:257:2: error: unknown type name 'uint32_t' uint32_t num_banks; /* from KFD */ /usr/include/linux/kfd_ioctl.h:258:2: error: unknown type name 'uint32_t' uint32_t num_ranks; /* from KFD */ Fixes: 6a1c9510694fe ("drm/amdkfd: Adding new IOCTL for scratch memory v2") Fixes: 5d71dbc3a5886 ("drm/amdkfd: Implement image tiling mode support v2") Signed-off-by: Dmitry V. Levin Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 731d0df722e3..6e80501368ae 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -233,29 +233,29 @@ struct kfd_ioctl_wait_events_args { }; struct kfd_ioctl_set_scratch_backing_va_args { - uint64_t va_addr; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { /* to KFD: pointer to tile array */ - uint64_t tile_config_ptr; + __u64 tile_config_ptr; /* to KFD: pointer to macro tile array */ - uint64_t macro_tile_config_ptr; + __u64 macro_tile_config_ptr; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_tile_configs; + __u32 num_tile_configs; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_macro_tile_configs; + __u32 num_macro_tile_configs; - uint32_t gpu_id; /* to KFD */ - uint32_t gb_addr_config; /* from KFD */ - uint32_t num_banks; /* from KFD */ - uint32_t num_ranks; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ /* struct size can be extended later if needed * without breaking ABI compatibility */ -- cgit v1.2.3 From b4b591c87f2b0f4ebaf3a68d4f13873b241aa584 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:21 +0200 Subject: nvme-rdma: don't suppress send completions The entire completions suppress mechanism is currently broken because the HCA might retry a send operation (due to dropped ack) after the nvme transaction has completed. In order to handle this, we signal all send completions and introduce a separate done handler for async events as they will be handled differently (as they don't include in-capsule data by definition). Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 54 +++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2c597105a6bf..61511bed8aca 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,7 +77,6 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -510,7 +509,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; - atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -1204,21 +1202,9 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -/* - * We want to signal completion at least every queue depth/2. This returns the - * largest power of two that is not above half of (queue size + 1) to optimize - * (avoid divisions). - */ -static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) -{ - int limit = 1 << ilog2((queue->queue_size + 1) / 2); - - return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; -} - static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, - struct ib_send_wr *first, bool flush) + struct ib_send_wr *first) { struct ib_send_wr wr, *bad_wr; int ret; @@ -1227,31 +1213,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, sge->length = sizeof(struct nvme_command), sge->lkey = queue->device->pd->local_dma_lkey; - qe->cqe.done = nvme_rdma_send_done; - wr.next = NULL; wr.wr_cqe = &qe->cqe; wr.sg_list = sge; wr.num_sge = num_sge; wr.opcode = IB_WR_SEND; - wr.send_flags = 0; - - /* - * Unsignalled send completions are another giant desaster in the - * IB Verbs spec: If we don't regularly post signalled sends - * the send queue will fill up and only a QP reset will rescue us. - * Would have been way to obvious to handle this in hardware or - * at least the RDMA stack.. - * - * Always signal the flushes. The magic request used for the flush - * sequencer is not allocated in our driver's tagset and it's - * triggered to be freed by blk_cleanup_queue(). So we need to - * always mark it as signaled to ensure that the "wr_cqe", which is - * embedded in request's payload, is not freed when __ib_process_cq() - * calls wr_cqe->done(). - */ - if (nvme_rdma_queue_sig_limit(queue) || flush) - wr.send_flags |= IB_SEND_SIGNALED; + wr.send_flags = IB_SEND_SIGNALED; if (first) first->next = ≀ @@ -1301,6 +1268,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) return queue->ctrl->tag_set.tags[queue_idx - 1]; } +static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "ASYNC"); +} + static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); @@ -1319,10 +1292,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) cmd->common.flags |= NVME_CMD_SGL_METABUF; nvme_rdma_set_sg_null(cmd); + sqe->cqe.done = nvme_rdma_async_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE); - ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); + ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL); WARN_ON_ONCE(ret); } @@ -1607,7 +1582,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_qe *sqe = &req->sqe; struct nvme_command *c = sqe->data; - bool flush = false; struct ib_device *dev; blk_status_t ret; int err; @@ -1636,13 +1610,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, goto err; } + sqe->cqe.done = nvme_rdma_send_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - if (req_op(rq) == REQ_OP_FLUSH) - flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; -- cgit v1.2.3 From 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:22 +0200 Subject: nvme-rdma: don't complete requests before a send work request has completed In order to guarantee that the HCA will never get an access violation (either from invalidated rkey or from iommu) when retrying a send operation we must complete a request only when both send completion and the nvme cqe has arrived. We need to set the send/recv completions flags atomically because we might have more than a single context accessing the request concurrently (one is cq irq-poll context and the other is user-polling used in IOCB_HIPRI). Only then we are safe to invalidate the rkey (if needed), unmap the host buffers, and complete the IO. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 61511bed8aca..502f7c515a99 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -59,6 +59,9 @@ struct nvme_rdma_request { struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; + union nvme_result result; + __le16 status; + refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; @@ -1162,6 +1165,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; req->mr->need_inval = false; + refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1198,8 +1202,19 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_qe *qe = + container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); + struct nvme_rdma_request *req = + container_of(qe, struct nvme_rdma_request, sqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "SEND"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, @@ -1318,14 +1333,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->tag == tag) - ret = 1; + req->status = cqe->status; + req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - nvme_end_request(rq, cqe->status, cqe->result); + if (refcount_dec_and_test(&req->ref)) { + if (rq->tag == tag) + ret = 1; + nvme_end_request(rq, req->status, req->result); + } + return ret; } -- cgit v1.2.3 From 2f122e4f5107cf8ab0e592d63ed816a00110b4fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:23 +0200 Subject: nvme-rdma: wait for local invalidation before completing a request We must not complete a request before the host memory region is invalidated. Luckily we have send with invalidate protocol support so we usually don't need to execute it, but in case the target did not invalidate a memory region for us, we must wait for the invalidation to complete before unmapping host memory and completing the I/O. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 502f7c515a99..c03460d0ca94 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1019,8 +1019,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_request *req = + container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); + } static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, @@ -1031,7 +1041,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, .opcode = IB_WR_LOCAL_INV, .next = NULL, .num_sge = 0, - .send_flags = 0, + .send_flags = IB_SEND_SIGNALED, .ex.invalidate_rkey = req->mr->rkey, }; @@ -1045,24 +1055,12 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int res; if (!blk_rq_bytes(rq)) return; - if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { - res = nvme_rdma_inv_rkey(queue, req); - if (unlikely(res < 0)) { - dev_err(ctrl->ctrl.device, - "Queueing INV WR for rkey %#x failed (%d)\n", - req->mr->rkey, res); - nvme_rdma_error_recovery(queue->ctrl); - } - } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1337,8 +1335,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) + wc->ex.invalidate_rkey == req->mr->rkey) { req->mr->need_inval = false; + } else if (req->mr->need_inval) { + ret = nvme_rdma_inv_rkey(queue, req); + if (unlikely(ret < 0)) { + dev_err(queue->ctrl->ctrl.device, + "Queueing INV WR for rkey %#x failed (%d)\n", + req->mr->rkey, ret); + nvme_rdma_error_recovery(queue->ctrl); + } + /* the local invalidation completion will end the request */ + return 0; + } if (refcount_dec_and_test(&req->ref)) { if (rq->tag == tag) -- cgit v1.2.3 From 3ef0279bb0031f67537bd8972899a6a23d3064d7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:24 +0200 Subject: nvme-rdma: Check remotely invalidated rkey matches our expected rkey If we got a remote invalidation on a bogus rkey, this is a protocol error. Fail the connection in this case. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c03460d0ca94..3a952d458e7c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1334,8 +1334,13 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->status = cqe->status; req->result = cqe->result; - if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) { + if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { + if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { + dev_err(queue->ctrl->ctrl.device, + "Bogus remote invalidation for rkey %#x\n", + req->mr->rkey); + nvme_rdma_error_recovery(queue->ctrl); + } req->mr->need_inval = false; } else if (req->mr->need_inval) { ret = nvme_rdma_inv_rkey(queue, req); -- cgit v1.2.3 From f41725bbe16b0773302c0cc7dc2e89f54828712d Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 26 Nov 2017 10:40:55 +0000 Subject: nvme-rdma: Use mr pool Currently, blk_mq_tagset_iter() iterate over initial hctx tags only. If an I/O scheduler is used, it doesn't iterate the hctx scheduler tags and the static request aren't been updated. For example, while using NVMe over Fabrics RDMA host, this cause us not to reinit the scheduler requests and thus not re-register all the memory regions during the tagset re-initialization in the reconnect flow. This may lead to a memory registration error: "MEMREG for CQE 0xffff88044c14dce8 failed with status memory management operation error (6)" With this commit we don't need to reinit the requests, and thus fix this failure. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 95 +++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3a952d458e7c..02ef0771f6b9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -260,32 +261,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) return ret; } -static int nvme_rdma_reinit_request(void *data, struct request *rq) -{ - struct nvme_rdma_ctrl *ctrl = data; - struct nvme_rdma_device *dev = ctrl->device; - struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - int ret = 0; - - if (WARN_ON_ONCE(!req->mr)) - return 0; - - ib_dereg_mr(req->mr); - - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - req->mr = NULL; - goto out; - } - - req->mr->need_inval = false; - -out: - return ret; -} - static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx) { @@ -295,9 +270,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_device *dev = queue->device; - if (req->mr) - ib_dereg_mr(req->mr); - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); } @@ -319,21 +291,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (ret) return ret; - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - goto out_free_qe; - } - req->queue = queue; return 0; - -out_free_qe: - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - return -ENOMEM; } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -433,6 +393,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); + rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -442,6 +404,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +{ + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ibdev->attrs.max_fast_reg_page_list_len); +} + static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) { struct ib_device *ibdev; @@ -484,8 +452,22 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, + queue->queue_size, + IB_MR_TYPE_MEM_REG, + nvme_rdma_get_max_fr_pages(ibdev)); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_ring; + } + return 0; +out_destroy_ring: + nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, + sizeof(struct nvme_completion), DMA_FROM_DEVICE); out_destroy_qp: rdma_destroy_qp(queue->cm_id); out_destroy_ib_cq: @@ -757,8 +739,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); @@ -772,10 +753,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = PTR_ERR(ctrl->ctrl.admin_q); goto out_free_tagset; } - } else { - error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); - if (error) - goto out_free_queue; } error = nvme_rdma_start_queue(ctrl, 0); @@ -855,10 +832,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } } else { - ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); - if (ret) - goto out_free_io_queues; - blk_mq_update_nr_hw_queues(&ctrl->tag_set, ctrl->ctrl.queue_count - 1); } @@ -1061,6 +1034,11 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; + if (req->mr) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; + } + ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1117,12 +1095,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + /* * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (unlikely(nr < count)) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; if (nr < 0) return nr; return -EINVAL; @@ -1141,8 +1125,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->mr->need_inval = true; - sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); @@ -1162,7 +1144,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; - req->mr->need_inval = false; refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1341,8 +1322,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->mr->rkey); nvme_rdma_error_recovery(queue->ctrl); } - req->mr->need_inval = false; - } else if (req->mr->need_inval) { + } else if (req->mr) { ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1650,7 +1630,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, sizeof(struct nvme_command), DMA_TO_DEVICE); err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL); + req->mr ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; @@ -1798,7 +1778,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, - .reinit_request = nvme_rdma_reinit_request, }; static inline bool -- cgit v1.2.3 From 2734166e89639c973c6e125ac8bcfc2d9db72b70 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 25 Nov 2017 13:14:40 -0600 Subject: net: openvswitch: datapath: fix data type in queue_gso_packets gso_type is being used in binary AND operations together with SKB_GSO_UDP. The issue is that variable gso_type is of type unsigned short and SKB_GSO_UDP expands to more than 16 bits: SKB_GSO_UDP = 1 << 16 this makes any binary AND operation between gso_type and SKB_GSO_UDP to be always zero, hence making some code unreachable and likely causing undesired behavior. Fix this by changing the data type of variable gso_type to unsigned int. Addresses-Coverity-ID: 1462223 Fixes: 0c19f846d582 ("net: accept UFO datagrams from tuntap and packet") Signed-off-by: Gustavo A. R. Silva Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99cfafc2a139..ef38e5aecd28 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; + unsigned int gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; -- cgit v1.2.3 From 67c8d22a73128ff910e2287567132530abcf5b71 Mon Sep 17 00:00:00 2001 From: zhangliping Date: Sat, 25 Nov 2017 22:02:12 +0800 Subject: openvswitch: fix the incorrect flow action alloc size If we want to add a datapath flow, which has more than 500 vxlan outputs' action, we will get the following error reports: openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Actions may not be safe on all matching packets ... ... It seems that we can simply enlarge the MAX_ACTIONS_BUFSIZE to fix it, but this is not the root cause. For example, for a vxlan output action, we need about 60 bytes for the nlattr, but after it is converted to the flow action, it only occupies 24 bytes. This means that we can still support more than 1000 vxlan output actions for a single datapath flow under the the current 32k max limitation. So even if the nla_len(attr) is larger than MAX_ACTIONS_BUFSIZE, we shouldn't report EINVAL and keep it move on, as the judgement can be done by the reserve_sfa_size. Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc424798ba6f..624ea74353dd 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2241,14 +2241,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -2321,12 +2318,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -3059,7 +3059,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); -- cgit v1.2.3 From ed81cc612c92167fe7606773e036ee5ccef6c190 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 21 Nov 2017 17:09:59 +0100 Subject: hwmon: Drop reference to Jean's tree This tree has not been used for over a year, Guenter is taking all the hwmon patches in practice. Signed-off-by: Jean Delvare Cc: Guenter Roeck Signed-off-by: Guenter Roeck --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..533fbb03a95c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6174,7 +6174,6 @@ M: Jean Delvare M: Guenter Roeck L: linux-hwmon@vger.kernel.org W: http://hwmon.wiki.kernel.org/ -T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained F: Documentation/hwmon/ -- cgit v1.2.3 From 517f56839f581618d24f2e67a35738a5c6cbaecb Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Wed, 15 Nov 2017 08:24:46 +0000 Subject: drm/rockchip: dw-mipi-dsi: fix possible un-balanced runtime PM enable In the case where the bind gets deferred we would end up with a un-balanced runtime PM enable call. Fix this by simply moving the pm_runtime_enable call to the end of the bind function when all paths have succeeded. Signed-off-by: Mirza Krak Signed-off-by: Sandy Huang Link: https://patchwork.freedesktop.org/patch/msgid/1510734286-37434-1-git-send-email-mirza.krak@endian.se --- drivers/gpu/drm/rockchip/dw-mipi-dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c index 9a20b9dc27c8..f7fc652b0027 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c @@ -1275,8 +1275,6 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, goto err_pllref; } - pm_runtime_enable(dev); - dsi->dsi_host.ops = &dw_mipi_dsi_host_ops; dsi->dsi_host.dev = dev; ret = mipi_dsi_host_register(&dsi->dsi_host); @@ -1291,6 +1289,7 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, } dev_set_drvdata(dev, dsi); + pm_runtime_enable(dev); return 0; err_mipi_dsi_host: -- cgit v1.2.3 From c654b21ede93845863597de9ad774fd30db5f2ab Mon Sep 17 00:00:00 2001 From: Sebastian Sjoholm Date: Mon, 20 Nov 2017 19:29:32 +0100 Subject: USB: serial: option: add Quectel BG96 id Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development board (EVB). The USB id is added to option.c to allow DIAG,GPS,AT and modem communication with the BG96. Signed-off-by: Sebastian Sjoholm Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index aaa7d901a06d..3b3513874cfd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_BG96 0x0296 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1182,6 +1183,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), -- cgit v1.2.3 From fa56b3f83177e1ad60815f750dd3816bd4c25677 Mon Sep 17 00:00:00 2001 From: Vitor Massaru Iha Date: Mon, 30 Oct 2017 11:36:54 -0200 Subject: drm: Fix checkpatch issue: "WARNING: braces {} are not necessary for single statement blocks." Signed-off-by: Vitor Massaru Iha Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 72b22b805412..5a5427bbd70e 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -317,9 +317,8 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) formats, ARRAY_SIZE(formats), NULL, DRM_PLANE_TYPE_PRIMARY, NULL); - if (ret) { + if (ret) return ERR_PTR(ret); - } drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs); hdlcd->plane = plane; -- cgit v1.2.3 From f73e8b82531573a198a4d0e5bff0d3256cbbd1d8 Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split into hdlcd and malidp specific patches] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 45b174fea36b..0afb53b1f4e9 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -355,7 +355,7 @@ err_unload: err_free: drm_mode_config_cleanup(drm); dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -380,7 +380,7 @@ static void hdlcd_drm_unbind(struct device *dev) pm_runtime_disable(drm->dev); of_reserved_mem_device_release(drm->dev); drm_mode_config_cleanup(drm); - drm_dev_unref(drm); + drm_dev_put(drm); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); } -- cgit v1.2.3 From 7b6ddeaf27eca72795ceeae2f0f347db1b5f9a30 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Nov 2017 14:46:08 +0100 Subject: mac80211: use QoS NDP for AP probing When connected to a QoS/WMM AP, mac80211 should use a QoS NDP for probing it, instead of a regular non-QoS one, fix this. Change all the drivers to *not* allow QoS NDP for now, even though it looks like most of them should be OK with that. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/st/cw1200/sta.c | 4 ++-- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/cmd.c | 5 +++-- include/net/mac80211.h | 8 +++++++- net/mac80211/mlme.c | 2 +- net/mac80211/tx.c | 29 +++++++++++++++++++++++++++-- 7 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index dfb26f03c1a2..1b05b5d7a038 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1113,7 +1113,7 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, if (!avp->assoc) return false; - skb = ieee80211_nullfunc_get(sc->hw, vif); + skb = ieee80211_nullfunc_get(sc->hw, vif, false); if (!skb) return false; diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index 03687a80d6e9..38678e9a0562 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -198,7 +198,7 @@ void __cw1200_cqm_bssloss_sm(struct cw1200_common *priv, priv->bss_loss_state++; - skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); WARN_ON(!skb); if (skb) cw1200_tx(priv->hw, NULL, skb); @@ -2265,7 +2265,7 @@ static int cw1200_upload_null(struct cw1200_common *priv) .rate = 0xFF, }; - frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); if (!frame.skb) return -ENOMEM; diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9915d83a4a30..6d02c660b4ab 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -566,7 +566,7 @@ static int wl1251_build_null_data(struct wl1251 *wl) size = sizeof(struct wl12xx_null_data_template); ptr = NULL; } else { - skb = ieee80211_nullfunc_get(wl->hw, wl->vif); + skb = ieee80211_nullfunc_get(wl->hw, wl->vif, false); if (!skb) goto out; size = skb->len; diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 2bfc12fdc929..761cf8573a80 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1069,7 +1069,8 @@ int wl12xx_cmd_build_null_data(struct wl1271 *wl, struct wl12xx_vif *wlvif) ptr = NULL; } else { skb = ieee80211_nullfunc_get(wl->hw, - wl12xx_wlvif_to_vif(wlvif)); + wl12xx_wlvif_to_vif(wlvif), + false); if (!skb) goto out; size = skb->len; @@ -1096,7 +1097,7 @@ int wl12xx_cmd_build_klv_null_data(struct wl1271 *wl, struct sk_buff *skb = NULL; int ret = -ENOMEM; - skb = ieee80211_nullfunc_get(wl->hw, vif); + skb = ieee80211_nullfunc_get(wl->hw, vif, false); if (!skb) goto out; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cc9073e45be9..eec143cca1c0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4470,18 +4470,24 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @qos_ok: QoS NDP is acceptable to the caller, this should be set + * if at all possible * * Creates a Nullfunc template which can, for example, uploaded to * hardware. The template must be updated after association so that correct * BSSID and address is used. * + * If @qos_ndp is set and the association is to an AP with QoS/WMM, the + * returned packet will be QoS NDP. + * * Note: Caller (or hardware) is responsible for setting the * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. * * Return: The nullfunc template. %NULL on error. */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 04460440d731..c244691deab9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); if (!skb) return; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b8154474b9e..3160954fc406 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + bool qos_ok) { struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_managed *ifmgd; struct ieee80211_local *local; struct sk_buff *skb; + bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; @@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, ifmgd = &sdata->u.mgd; local = sdata->local; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (qos_ok) { + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, ifmgd->bssid); + qos = sta && sta->sta.wme; + rcu_read_unlock(); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); if (!skb) return NULL; @@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_TODS); + if (qos) { + __le16 qos = cpu_to_le16(7); + + BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_STYPE_NULLFUNC) != + IEEE80211_STYPE_QOS_NULLFUNC); + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC); + skb->priority = 7; + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb_put_data(skb, &qos, sizeof(qos)); + } + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); -- cgit v1.2.3 From fbbdad5edf0bb59786a51b94a9d006bc8c2da9a2 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 14 Nov 2017 23:20:05 +0800 Subject: mac80211: fix the update of path metric for RANN frame The previous path metric update from RANN frame has not considered the own link metric toward the transmitting mesh STA. Fix this. Reported-by: Michael65535 Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4f7826d7b47c..4394463a0c2e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); -- cgit v1.2.3 From 72e2c3438ba3bd2ed640b6b5ea9e58993dd9ab7f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 29 Oct 2017 11:51:09 +0200 Subject: mac80211: tear down RX aggregations first When doing HW restart we tear down aggregations. Since at this point we are not TX'ing any aggregation, while the peer is still sending RX aggregation over the air, it will make sense to tear down the RX aggregations first. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 41f5e48f8021..167f83b853e6 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -292,7 +292,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - ___ieee80211_stop_tx_ba_session(sta, i, reason); ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && @@ -300,6 +299,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, } mutex_unlock(&sta->ampdu_mlme.mtx); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + ___ieee80211_stop_tx_ba_session(sta, i, reason); + /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); -- cgit v1.2.3 From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Nov 2017 11:22:42 +0000 Subject: ARM: avoid faulting on qemu When qemu starts a kernel in a bare environment, the default SCR has the AW and FW bits clear, which means that the kernel can't modify the PSR A or PSR F bits, and means that FIQs and imprecise aborts are always masked. When running uboot under qemu, the AW and FW SCR bits are set, and the kernel functions normally - and this is how real hardware behaves. Fix this for qemu by ignoring the FIQ bit. Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode") Signed-off-by: Russell King --- arch/arm/kernel/entry-header.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 75f7a4e8541a..e056c9a9aa9d 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,7 +299,7 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @@ -331,7 +331,7 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc -- cgit v1.2.3 From 30bf90ccdec1da9c8198b161ecbff39ce4e5a9ba Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sun, 26 Nov 2017 06:52:53 +0000 Subject: usb: gadget: ffs: Forbid usb_ep_alloc_request from sleeping Found using DEBUG_ATOMIC_SLEEP while submitting an AIO read operation: [ 100.853642] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 100.861148] in_atomic(): 1, irqs_disabled(): 1, pid: 1880, name: python [ 100.867954] 2 locks held by python/1880: [ 100.867961] #0: (&epfile->mutex){....}, at: [] ffs_mutex_lock+0x27/0x30 [usb_f_fs] [ 100.868020] #1: (&(&ffs->eps_lock)->rlock){....}, at: [] ffs_epfile_io.isra.17+0x24b/0x590 [usb_f_fs] [ 100.868076] CPU: 1 PID: 1880 Comm: python Not tainted 4.14.0-edison+ #118 [ 100.868085] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 100.868093] Call Trace: [ 100.868122] dump_stack+0x47/0x62 [ 100.868156] ___might_sleep+0xfd/0x110 [ 100.868182] __might_sleep+0x68/0x70 [ 100.868217] kmem_cache_alloc_trace+0x4b/0x200 [ 100.868248] ? dwc3_gadget_ep_alloc_request+0x24/0xe0 [dwc3] [ 100.868302] dwc3_gadget_ep_alloc_request+0x24/0xe0 [dwc3] [ 100.868343] usb_ep_alloc_request+0x16/0xc0 [udc_core] [ 100.868386] ffs_epfile_io.isra.17+0x444/0x590 [usb_f_fs] [ 100.868424] ? _raw_spin_unlock_irqrestore+0x27/0x40 [ 100.868457] ? kiocb_set_cancel_fn+0x57/0x60 [ 100.868477] ? ffs_ep0_poll+0xc0/0xc0 [usb_f_fs] [ 100.868512] ffs_epfile_read_iter+0xfe/0x157 [usb_f_fs] [ 100.868551] ? security_file_permission+0x9c/0xd0 [ 100.868587] ? rw_verify_area+0xac/0x120 [ 100.868633] aio_read+0x9d/0x100 [ 100.868692] ? __fget+0xa2/0xd0 [ 100.868727] ? __might_sleep+0x68/0x70 [ 100.868763] SyS_io_submit+0x471/0x680 [ 100.868878] do_int80_syscall_32+0x4e/0xd0 [ 100.868921] entry_INT80_32+0x2a/0x2a [ 100.868932] EIP: 0xb7fbb676 [ 100.868941] EFLAGS: 00000292 CPU: 1 [ 100.868951] EAX: ffffffda EBX: b7aa2000 ECX: 00000002 EDX: b7af8368 [ 100.868961] ESI: b7fbb660 EDI: b7aab000 EBP: bfb6c658 ESP: bfb6c638 [ 100.868973] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 97ea059a7aa4..9aa457b53e01 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1012,7 +1012,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) else ret = ep->status; goto error_mutex; - } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) { + } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) { ret = -ENOMEM; } else { req->buf = data; -- cgit v1.2.3 From 593e789fd4421635bde09398b8a8fb510b18414a Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Wed, 22 Nov 2017 16:28:00 +0100 Subject: usb: bdc: fix platform_no_drv_owner.cocci warnings Remove .owner field if calls are used which set it automatically drivers/usb/gadget/udc/bdc/bdc_core.c:645:3-8: No need to set .owner here. The core will do it. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/bdc/bdc_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/bdc/bdc_core.c b/drivers/usb/gadget/udc/bdc/bdc_core.c index d39f070acbd7..01b44e159623 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_core.c +++ b/drivers/usb/gadget/udc/bdc/bdc_core.c @@ -642,7 +642,6 @@ static const struct of_device_id bdc_of_match[] = { static struct platform_driver bdc_driver = { .driver = { .name = BRCM_BDC_NAME, - .owner = THIS_MODULE, .pm = &bdc_pm_ops, .of_match_table = bdc_of_match, }, -- cgit v1.2.3 From b77000ed558daa3bef0899d29bf171b8c9b5e6a8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Nov 2017 16:20:52 -0500 Subject: btrfs: fix deadlock when writing out space cache If we fail to prepare our pages for whatever reason (out of memory in our case) we need to make sure to drop the block_group->data_rwsem, otherwise hilarity ensues. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba [ add label and use existing unlocking code ] Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cdc9f4015ec3..4426d1c73e50 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1264,7 +1264,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1358,6 +1358,7 @@ out_nospc_locked: out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); -- cgit v1.2.3 From 3aa623da789ff6edf789b5655debf33f50b3a9b2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:32 +0800 Subject: sctp: use sizeof(__u16) for each stream number length instead of magic number Now in stream reconf part there are still some places using magic number 2 for each stream number length. To make it more readable, this patch is to replace them with sizeof(__u16). Reported-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a11db21dc8a0..09c797a10aaa 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -563,7 +563,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( flags = SCTP_STREAM_RESET_INCOMING_SSN; } - nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); if (nums) { str_p = outreq->list_of_streams; for (i = 0; i < nums; i++) { @@ -627,7 +627,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( goto out; } - nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16); str_p = inreq->list_of_streams; for (i = 0; i < nums; i++) { if (ntohs(str_p[i]) >= stream->outcnt) { @@ -927,7 +927,8 @@ struct sctp_chunk *sctp_process_strreset_resp( outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; - nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; + nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / + sizeof(__u16); if (result == SCTP_STRRESET_PERFORMED) { if (nums) { @@ -956,7 +957,8 @@ struct sctp_chunk *sctp_process_strreset_resp( inreq = (struct sctp_strreset_inreq *)req; str_p = inreq->list_of_streams; - nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / + sizeof(__u16); *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); -- cgit v1.2.3 From d570a59c5b5f8fb3b65fa7b15ddb205a1d55f8d0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:33 +0800 Subject: sctp: only allow the out stream reset when the stream outq is empty Now the out stream reset in sctp stream reconf could be done even if the stream outq is not empty. It means that users can not be sure since which msg the new ssn will be used. To make this more synchronous, it shouldn't allow to do out stream reset until these chunks in unsent outq all are sent out. This patch checks the corresponding stream outqs when sending and processing the request . If any of them has unsent chunks in outq, it will return -EAGAIN instead or send SCTP_STRRESET_IN_PROGRESS back to the sender. Fixes: 7f9d68ac944e ("sctp: implement sender-side procedures for SSN Reset Request Parameter") Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 09c797a10aaa..b20903712d67 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -254,6 +254,30 @@ static int sctp_send_reconf(struct sctp_association *asoc, return retval; } +static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, + __u16 str_nums, __be16 *str_list) +{ + struct sctp_association *asoc; + __u16 i; + + asoc = container_of(stream, struct sctp_association, stream); + if (!asoc->outqueue.out_qlen) + return true; + + if (!str_nums) + return false; + + for (i = 0; i < str_nums; i++) { + __u16 sid = ntohs(str_list[i]); + + if (stream->out[sid].ext && + !list_empty(&stream->out[sid].ext->outq)) + return false; + } + + return true; +} + int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { @@ -317,6 +341,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, for (i = 0; i < str_nums; i++) nstr_list[i] = htons(str_list[i]); + if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { + retval = -EAGAIN; + goto out; + } + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); kfree(nstr_list); @@ -636,6 +665,12 @@ struct sctp_chunk *sctp_process_strreset_inreq( } } + if (!sctp_stream_outq_is_empty(stream, nums, str_p)) { + result = SCTP_STRRESET_IN_PROGRESS; + asoc->strreset_inseq--; + goto err; + } + chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); if (!chunk) goto out; -- cgit v1.2.3 From 5c6144a0eb5366ae07fc5059301b139338f39bbd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:34 +0800 Subject: sctp: only allow the asoc reset when the asoc outq is empty As it says in rfc6525#section5.1.4, before sending the request, C2: The sender has either no outstanding TSNs or considers all outstanding TSNs abandoned. Prior to this patch, it tried to consider all outstanding TSNs abandoned by dropping all chunks in all outqs with sctp_outq_free (even including sacked, retransmit and transmitted queues) when doing this reset, which is too aggressive. To make it work gently, this patch will only allow the asoc reset when the sender has no outstanding TSNs by checking if unsent, transmitted and retransmit are all empty with sctp_outq_is_empty before sending and processing the request. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index b20903712d67..f3b7d2779c18 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -406,6 +406,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) if (asoc->strreset_outstanding) return -EINPROGRESS; + if (!sctp_outq_is_empty(&asoc->outqueue)) + return -EAGAIN; + chunk = sctp_make_strreset_tsnreq(asoc); if (!chunk) return -ENOMEM; @@ -728,6 +731,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( } goto err; } + + if (!sctp_outq_is_empty(&asoc->outqueue)) { + result = SCTP_STRRESET_IN_PROGRESS; + goto err; + } + asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) -- cgit v1.2.3 From 159f2a7456c6ae95c1e1a58e8b8ec65ef12d51cf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:35 +0800 Subject: sctp: avoid flushing unsent queue when doing asoc reset Now when doing asoc reset, it cleans up sacked and abandoned queues by calling sctp_outq_free where it also cleans up unsent, retransmit and transmitted queues. It's safe for the sender of response, as these 3 queues are empty at that time. But when the receiver of response is doing the reset, the users may already enqueue some chunks into unsent during the time waiting the response, and these chunks should not be flushed. To void the chunks in it would be removed, it moves the queue into a temp list, then gets it back after sctp_outq_free is done. The patch also fixes some incorrect comments in sctp_process_strreset_tsnreq. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f3b7d2779c18..9dd5bfe3860c 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -747,9 +747,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( goto out; } - /* G3: The same processing as though a SACK chunk with no gap report - * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were - * received MUST be performed. + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); @@ -764,10 +765,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); - /* G4: The same processing as though a FWD-TSN chunk (as defined in - * [RFC3758]) with all streams affected and a new cumulative TSN - * ACK of the Receiver's Next TSN minus 1 were received MUST be - * performed. + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. */ sctp_outq_free(&asoc->outqueue); @@ -1021,6 +1021,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) { __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( &asoc->peer.tsn_map); + LIST_HEAD(temp); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); @@ -1029,7 +1030,13 @@ struct sctp_chunk *sctp_process_strreset_resp( SCTP_TSN_MAP_INITIAL, stsn, GFP_ATOMIC); + /* Clean up sacked and abandoned queues only. As the + * out_chunk_list may not be empty, splice it to temp, + * then get it back after sctp_outq_free is done. + */ + list_splice_init(&asoc->outqueue.out_chunk_list, &temp); sctp_outq_free(&asoc->outqueue); + list_splice_init(&temp, &asoc->outqueue.out_chunk_list); asoc->next_tsn = rtsn; asoc->ctsn_ack_point = asoc->next_tsn - 1; -- cgit v1.2.3 From 52a395896a051a3d5c34fba67c324f69ec5e67c6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:36 +0800 Subject: sctp: set sender next_tsn for the old result with ctsn_ack_point plus 1 When doing asoc reset, if the sender of the response has already sent some chunk and increased asoc->next_tsn before the duplicate request comes, the response will use the old result with an incorrect sender next_tsn. Better than asoc->next_tsn, asoc->ctsn_ack_point can't be changed after the sender of the response has performed the asoc reset and before the peer has confirmed it, and it's value is still asoc->next_tsn original value minus 1. This patch sets sender next_tsn for the old result with ctsn_ack_point plus 1 when processing the duplicate request, to make sure the sender next_tsn value peer gets will be always right. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 9dd5bfe3860c..a20145b3a949 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -725,7 +725,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) { - next_tsn = asoc->next_tsn; + next_tsn = asoc->ctsn_ack_point + 1; init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; } -- cgit v1.2.3 From b7e5a591502b03b4a1408bb93a15968d6ea446ce Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 20 Nov 2017 10:33:09 -0800 Subject: RISC-V: Remove __vdso_cmpxchg{32,64} symbol versions These were left over from an earlier version of the port. Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8c9dce95c11d..3ac08eebd11d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,8 +70,6 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; - __vdso_cmpxchg32; - __vdso_cmpxchg64; local: *; }; } -- cgit v1.2.3 From 28dfbe6ed483e8a589cce88095d7787d61bf9c16 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Oct 2017 15:42:14 -0700 Subject: RISC-V: Add VDSO entries for clock_get/gettimeofday/getcpu For now these are just placeholders that execute the syscall. We will later optimize them to avoid kernel crossings, but we'd like to have the VDSO entries from the first released kernel version to make the ABI simpler. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 6 +++++- arch/riscv/kernel/vdso/clock_getres.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/clock_gettime.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/getcpu.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/gettimeofday.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 4 ++++ 6 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/vdso/clock_getres.S create mode 100644 arch/riscv/kernel/vdso/clock_gettime.S create mode 100644 arch/riscv/kernel/vdso/getcpu.S create mode 100644 arch/riscv/kernel/vdso/gettimeofday.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 523d0a8ac8db..2dcc4f3070bc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,7 +1,11 @@ # Copied from arch/tile/kernel/vdso/Makefile # Symbols present in the vdso -vdso-syms = rt_sigreturn +vdso-syms = rt_sigreturn +vdso-syms += gettimeofday +vdso-syms += clock_gettime +vdso-syms += clock_getres +vdso-syms += getcpu # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S new file mode 100644 index 000000000000..edf7e2339648 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_getres.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__vdso_clock_getres) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_getres + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S new file mode 100644 index 000000000000..aac65676c6d5 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_gettime.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__vdso_clock_gettime) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_gettime + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 000000000000..cc7e98924484 --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S new file mode 100644 index 000000000000..da85d33e8990 --- /dev/null +++ b/arch/riscv/kernel/vdso/gettimeofday.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__vdso_gettimeofday) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_gettimeofday + ecall + ret + .cfi_endproc +ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 3ac08eebd11d..c7543c6a00f9 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,6 +70,10 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; local: *; }; } -- cgit v1.2.3 From eae8d82529dd9820e206ecba0047b806c4410e65 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 6 Nov 2017 10:43:18 +0800 Subject: btrfs: Fix wild memory access in compression level parser [BUG] Kernel panic when mounting with "-o compress" mount option. KASAN will report like: ------ ================================================================== BUG: KASAN: wild-memory-access in strncmp+0x31/0xc0 Read of size 1 at addr d86735fce994f800 by task mount/662 ... Call Trace: dump_stack+0xe3/0x175 kasan_report+0x163/0x370 __asan_load1+0x47/0x50 strncmp+0x31/0xc0 btrfs_compress_str2level+0x20/0x70 [btrfs] btrfs_parse_options+0xff4/0x1870 [btrfs] open_ctree+0x2679/0x49f0 [btrfs] btrfs_mount+0x1b7f/0x1d30 [btrfs] mount_fs+0x49/0x190 vfs_kern_mount.part.29+0xba/0x280 vfs_kern_mount+0x13/0x20 btrfs_mount+0x31e/0x1d30 [btrfs] mount_fs+0x49/0x190 vfs_kern_mount.part.29+0xba/0x280 do_mount+0xaad/0x1a00 SyS_mount+0x98/0xe0 entry_SYSCALL_64_fastpath+0x1f/0xbe ------ [Cause] For 'compress' and 'compress_force' options, its token doesn't expect any parameter so its args[0] contains uninitialized data. Accessing args[0] will cause above wild memory access. [Fix] For Opt_compress and Opt_compress_force, set compression level to the default. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ set the default in advance ] Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- fs/btrfs/compression.h | 2 ++ fs/btrfs/super.c | 13 +++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4a78e5726337..5982c8a71f02 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1529,5 +1529,5 @@ unsigned int btrfs_compress_str2level(const char *str) if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0) return str[5] - '0'; - return 0; + return BTRFS_ZLIB_DEFAULT_LEVEL; } diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 93c5b82ae97e..0868cc554f14 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -34,6 +34,8 @@ /* Maximum size of data before compression */ #define BTRFS_MAX_UNCOMPRESSED (SZ_128K) +#define BTRFS_ZLIB_DEFAULT_LEVEL 3 + struct compressed_bio { /* number of bios pending for this compressed extent */ refcount_t pending_bios; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65af029559b5..ff3545e526f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -507,9 +507,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { compress_type = "zlib"; + info->compress_type = BTRFS_COMPRESS_ZLIB; - info->compress_level = - btrfs_compress_str2level(args[0].from); + info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; + /* + * args[0] contains uninitialized data since + * for these tokens we don't expect any + * parameter. + */ + if (token != Opt_compress && + token != Opt_compress_force) + info->compress_level = + btrfs_compress_str2level(args[0].from); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); -- cgit v1.2.3 From 9f97df50c52c2887432debb6238f4e43567386a5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: reiserfs: remove unneeded i_version bump The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702..4885c7b6e44f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2591,7 +2591,6 @@ out: return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; -- cgit v1.2.3 From e872fa94662d0644057c7c80b3071bdb9249e5ab Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:49 +0000 Subject: KVM: lapic: Split out x2apic ldr calculation Split out the ldr calculation from kvm_apic_set_x2apic_id since we're about to reuse it in the following patch. Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 943acbf00c69..e2edb1103002 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); WARN_ON_ONCE(id != apic->vcpu->vcpu_id); -- cgit v1.2.3 From 12806ba937382fdfdbad62a399aa2dce65c10fcd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:50 +0000 Subject: KVM: lapic: Fixup LDR on load in x2apic In x2apic mode the LDR is fixed based on the ID rather than separately loadable like it was before x2. When kvm_apic_set_state is called, the base is set, and if it has the X2APIC_ENABLE flag set then the LDR is calculated; however that value gets overwritten by the memcpy a few lines below overwriting it with the value that came from userland. The symptom is a lack of EOI after loading the state (e.g. after a QEMU migration) and is due to the EOI bitmap being wrong due to the incorrect LDR. This was seen with a Win2016 guest under Qemu with irqchip=split whose USB mouse didn't work after a VM migration. This corresponds to RH bug: https://bugzilla.redhat.com/show_bug.cgi?id=1502591 Reported-by: Yiqian Wei Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org [Applied fixup from Liran Alon. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2edb1103002..e2c1fb8d35ce 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2250,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2260,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; -- cgit v1.2.3 From e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4552427105f6..f49fe514d1b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1798,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); -- cgit v1.2.3 From c37c28730bb031cc8a44a130c2555c0f3efbe2d0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0d59dbe430c8..04f19b099617 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5602,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); -- cgit v1.2.3 From b74558259c5149e5edd79348b70eb34177cbeea0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 22 Nov 2017 14:04:00 -0800 Subject: KVM: VMX: Fix vmx->nested freeing when no SMI handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 2939 at arch/x86/kvm/vmx.c:3844 free_loaded_vmcs+0x77/0x80 [kvm_intel] CPU: 5 PID: 2939 Comm: repro Not tainted 4.14.0+ #26 RIP: 0010:free_loaded_vmcs+0x77/0x80 [kvm_intel] Call Trace: vmx_free_vcpu+0xda/0x130 [kvm_intel] kvm_arch_destroy_vm+0x192/0x290 [kvm] kvm_put_kvm+0x262/0x560 [kvm] kvm_vm_release+0x2c/0x30 [kvm] __fput+0x190/0x370 task_work_run+0xa1/0xd0 do_exit+0x4d2/0x13e0 do_group_exit+0x89/0x140 get_signal+0x318/0xb80 do_signal+0x8c/0xb40 exit_to_usermode_loop+0xe4/0x140 syscall_return_slowpath+0x206/0x230 entry_SYSCALL_64_fastpath+0x98/0x9a The syzkaller testcase will execute VMXON/VMLAUCH instructions, so the vmx->nested stuff is populated, it will also issue KVM_SMI ioctl. However, the testcase is just a simple c program and not be lauched by something like seabios which implements smi_handler. Commit 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) gets out of guest mode and set nested.vmxon to false for the duration of SMM according to SDM 34.14.1 "leave VMX operation" upon entering SMM. We can't alloc/free the vmx->nested stuff each time when entering/exiting SMM since it will induce more overhead. So the function vmx_pre_enter_smm() marks nested.vmxon false even if vmx->nested stuff is still populated. What it expected is em_rsm() can mark nested.vmxon to be true again. However, the smi_handler/rsm will not execute since there is no something like seabios in this scenario. The function free_nested() fails to free the vmx->nested stuff since the vmx->nested.vmxon is false which results in the above warning. This patch fixes it by also considering the no SMI handler case, luckily vmx->nested.smm.vmxon is marked according to the value of vmx->nested.vmxon in vmx_pre_enter_smm(), we can take advantage of it and free vmx->nested stuff when L1 goes down. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Reviewed-by: Liran Alon Fixes: 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 04f19b099617..4704aaf6d19e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7415,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) */ static void free_nested(struct vcpu_vmx *vmx) { - if (!vmx->nested.vmxon) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; -- cgit v1.2.3 From ebb70442cdd4872260c2415929c456be3562da82 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Nov 2017 14:35:40 -0700 Subject: Btrfs: fix list_add corruption and soft lockups in fsync Xfstests btrfs/146 revealed this corruption, [ 58.138831] Buffer I/O error on dev dm-0, logical block 2621424, async page read [ 58.151233] BTRFS error (device sdf): bdev /dev/mapper/error-test errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 [ 58.152403] list_add corruption. prev->next should be next (ffff88005e6775d8), but was ffffc9000189be88. (prev=ffffc9000189be88). [ 58.153518] ------------[ cut here ]------------ [ 58.153892] WARNING: CPU: 1 PID: 1287 at lib/list_debug.c:31 __list_add_valid+0x169/0x1f0 ... [ 58.157379] RIP: 0010:__list_add_valid+0x169/0x1f0 ... [ 58.161956] Call Trace: [ 58.162264] btrfs_log_inode_parent+0x5bd/0xfb0 [btrfs] [ 58.163583] btrfs_log_dentry_safe+0x60/0x80 [btrfs] [ 58.164003] btrfs_sync_file+0x4c2/0x6f0 [btrfs] [ 58.164393] vfs_fsync_range+0x5f/0xd0 [ 58.164898] do_fsync+0x5a/0x90 [ 58.165170] SyS_fsync+0x10/0x20 [ 58.165395] entry_SYSCALL_64_fastpath+0x1f/0xbe ... It turns out that we could record btrfs_log_ctx:io_err in log_one_extents when IO fails, but make log_one_extents() return '0' instead of -EIO, so the IO error is not acknowledged by the callers, i.e. btrfs_log_inode_parent(), which would remove btrfs_log_ctx:list from list head 'root->log_ctxs'. Since btrfs_log_ctx is allocated from stack memory, it'd get freed with a object alive on the list. then a future list_add will throw the above warning. This returns the correct error in the above case. Jeff also reported this while testing against his fsync error patch set[1]. [1]: https://www.spinics.net/lists/linux-btrfs/msg65308.html "btrfs list corruption and soft lockups while testing writeback error handling" Fixes: 8407f553268a4611f254 ("Btrfs: fix data corruption after fast fsync and writeback error") Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 +++-- fs/btrfs/tree-log.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2de7b4f4ca3c..eb1bac7c8553 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2057,6 +2057,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); + btrfs_init_log_ctx(&ctx, inode); + /* * We write the dirty pages in the range and wait until they complete * out of the ->i_mutex. If so, we can flush the dirty pages by @@ -2203,8 +2205,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx, inode); - ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ @@ -2262,6 +2262,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = btrfs_end_transaction(trans); } out: + ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); if (!ret) ret = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index aa7c71cff575..7bf9b31561db 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4102,7 +4102,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (ordered_io_err) { ctx->io_err = -EIO; - return 0; + return ctx->io_err; } btrfs_init_map_token(&token); -- cgit v1.2.3 From 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f Mon Sep 17 00:00:00 2001 From: "Jan H. Schönherr" Date: Fri, 24 Nov 2017 22:39:01 +0100 Subject: KVM: Let KVM_SET_SIGNAL_MASK work as advertised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that "any unblocked signal received [...] will cause KVM_RUN to return with -EINTR" and that "the signal will only be delivered if not blocked by the original signal mask". This, however, is only true, when the calling task has a signal handler registered for a signal. If not, signal evaluation is short-circuited for SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN returning or the whole process is terminated. Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar to that in do_sigtimedwait() to avoid short-circuiting of signals. Signed-off-by: Jan H. Schönherr Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 7 ++----- arch/powerpc/kvm/powerpc.c | 7 ++----- arch/s390/kvm/kvm-s390.c | 7 ++----- arch/x86/kvm/x86.c | 7 ++----- include/linux/kvm_host.h | 3 +++ virt/kvm/arm/arm.c | 8 +++----- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d535edc01434..75fdeaa8c62f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; - sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) @@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b6c53c42ac9..1915e86cef6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; - sigset_t sigsaved; if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; @@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (run->immediate_exit) r = -EINTR; else r = kvmppc_vcpu_run(run, vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98ad8b9e0360..9614aea5839b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3372,7 +3372,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; - sigset_t sigsaved; if (kvm_run->immediate_exit) return -EINTR; @@ -3382,8 +3381,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); @@ -3417,8 +3415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; return rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f49fe514d1b2..eee8e7faf1af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7267,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; fpu__initialize(fpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7315,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2e754b7c282c..893d6d606cd0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_sigset_activate(struct kvm_vcpu *vcpu); +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a6524ff27de4..a67c106d73f5 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -615,7 +615,6 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; - sigset_t sigsaved; if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; @@ -633,8 +632,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (run->immediate_exit) return -EINTR; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; @@ -769,8 +767,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_update_run(vcpu); } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); + return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2dd1a9ca4599..c01cff064ec5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2065,6 +2065,29 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +void kvm_sigset_activate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + /* + * This does a lockless modification of ->real_blocked, which is fine + * because, only current can change ->real_blocked and all readers of + * ->real_blocked don't care as long ->real_blocked is always a subset + * of ->blocked. + */ + sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); +} + +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); + sigemptyset(¤t->real_blocked); +} + static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { unsigned int old, val, grow; -- cgit v1.2.3 From 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: xfs: always free inline data before resetting inode fork during ifree In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 61d1cb7dc10d..801274126648 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2400,6 +2400,24 @@ retry: return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2437,6 +2455,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; -- cgit v1.2.3 From 509955823cc9cc225c05673b1b83d70ca70c5c60 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Nov 2017 20:53:02 -0800 Subject: xfs: log recovery should replay deferred ops in order As part of testing log recovery with dm_log_writes, Amir Goldstein discovered an error in the deferred ops recovery that lead to corruption of the filesystem metadata if a reflink+rmap filesystem happened to shut down midway through a CoW remap: "This is what happens [after failed log recovery]: "Phase 1 - find and verify superblock... "Phase 2 - using internal log " - zero log... " - scan filesystem freespace and inode maps... " - found root inode chunk "Phase 3 - for each AG... " - scan (but don't clear) agi unlinked lists... " - process known inodes and perform inode discovery... " - agno = 0 "data fork in regular inode 134 claims CoW block 376 "correcting nextents for inode 134 "bad data fork in inode 134 "would have cleared inode 134" Hou Tao dissected the log contents of exactly such a crash: "According to the implementation of xfs_defer_finish(), these ops should be completed in the following sequence: "Have been done: "(1) CUI: Oper (160) "(2) BUI: Oper (161) "(3) CUD: Oper (194), for CUI Oper (160) "(4) RUI A: Oper (197), free rmap [0x155, 2, -9] "Should be done: "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI A "(8) RUD: for RUI B "Actually be done by xlog_recover_process_intents() "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI B "(8) RUD: for RUI A "So the rmap entry [0x155, 2, -9] for COW should be freed firstly, then a new rmap entry [0x155, 2, 137] will be added. However, as we can see from the log record in post_mount.log (generated after umount) and the trace print, the new rmap entry [0x155, 2, 137] are added firstly, then the rmap entry [0x155, 2, -9] are freed." When reconstructing the internal log state from the log items found on disk, it's required that deferred ops replay in exactly the same order that they would have had the filesystem not gone down. However, replaying unfinished deferred ops can create /more/ deferred ops. These new deferred ops are finished in the wrong order. This causes fs corruption and replay crashes, so let's create a single defer_ops to handle the subsequent ops created during replay, then use one single transaction at the end of log recovery to ensure that everything is replayed in the same order as they're supposed to be. Reported-by: Amir Goldstein Analyzed-by: Hou Tao Reviewed-by: Christoph Hellwig Tested-by: Amir Goldstein Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_item.c | 23 +++++--------- fs/xfs/xfs_bmap_item.h | 3 +- fs/xfs/xfs_log_recover.c | 75 +++++++++++++++++++++++++++++++++++++++++----- fs/xfs/xfs_refcount_item.c | 21 +++++-------- fs/xfs/xfs_refcount_item.h | 3 +- 5 files changed, 85 insertions(+), 40 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4..e5fb008d75e8 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init( int xfs_bui_recover( struct xfs_mount *mp, - struct xfs_bui_log_item *buip) + struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops) { int error = 0; unsigned int bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover( xfs_exntst_t state; struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec irec; - xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover( if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover( break; default: error = -EFSCORRUPTED; - goto err_dfops; + goto err_inode; } xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); if (error) - goto err_dfops; + goto err_inode; if (count > 0) { ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); if (error) - goto err_dfops; + goto err_inode; } - /* Finish transaction, free inodes. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto err_dfops; - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover( return error; -err_dfops: - xfs_defer_cancel(&dfops); err_inode: xfs_trans_cancel(tp); if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3c..24b354a2c836 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87b1c331f9eb..28d1abfe835e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" @@ -4716,7 +4717,8 @@ STATIC int xlog_recover_process_cui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_cui_log_item *cuip; int error; @@ -4729,7 +4731,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_cui_recover(mp, cuip); + error = xfs_cui_recover(mp, cuip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4756,7 +4758,8 @@ STATIC int xlog_recover_process_bui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_bui_log_item *buip; int error; @@ -4769,7 +4772,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_bui_recover(mp, buip); + error = xfs_bui_recover(mp, buip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) } } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops) +{ + struct xfs_trans *tp; + int64_t freeblks; + uint resblks; + int error; + + /* + * We're finishing the defer_ops that accumulated as a result of + * recovering unfinished intent items during log recovery. We + * reserve an itruncate transaction because it is the largest + * permanent transaction type. Since we're the only user of the fs + * right now, take 93% (15/16) of the available free blocks. Use + * weird math to avoid a 64-bit division. + */ + freeblks = percpu_counter_sum(&mp->m_fdblocks); + if (freeblks <= 0) + return -ENOSPC; + resblks = min_t(int64_t, UINT_MAX, freeblks); + resblks = (resblks * 15) >> 4; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + error = xfs_defer_finish(&tp, dfops); + if (error) + goto out_cancel; + + return xfs_trans_commit(tp); + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -4825,10 +4868,12 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_log_item *lip; - int error = 0; + struct xfs_defer_ops dfops; struct xfs_ail_cursor cur; + struct xfs_log_item *lip; struct xfs_ail *ailp; + xfs_fsblock_t firstfsb; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif @@ -4839,6 +4884,7 @@ xlog_recover_process_intents( #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif + xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4859,6 +4905,12 @@ xlog_recover_process_intents( */ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + /* + * NOTE: If your intent processing routine can create more + * deferred ops, you /must/ attach them to the dfops in this + * routine or else those subsequent intents will get + * replayed in the wrong order! + */ switch (lip->li_type) { case XFS_LI_EFI: error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4867,10 +4919,12 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip); + error = xlog_recover_process_cui(log->l_mp, ailp, lip, + &dfops); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip); + error = xlog_recover_process_bui(log->l_mp, ailp, lip, + &dfops); break; } if (error) @@ -4880,6 +4934,11 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); + if (error) + xfs_defer_cancel(&dfops); + else + error = xlog_finish_defer_ops(log->l_mp, &dfops); + return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255..3a55d6fc271b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init( int xfs_cui_recover( struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops) { int i; int error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; enum xfs_refcount_intent_type type; - xfs_fsblock_t firstfsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; struct xfs_bmbt_irec irec; - struct xfs_defer_ops dfops; bool requeue_only = false; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover( return error; cudp = xfs_trans_get_cud(tp, cuip); - xfs_defer_init(&dfops, &firstfsb); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - &dfops, type, refc->pe_startblock, refc->pe_len, + dfops, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_increase_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_decrease_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_ALLOC_COW: error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: error = xfs_refcount_free_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; @@ -525,17 +523,12 @@ xfs_cui_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64b..0e5327349a13 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops); #endif /* __XFS_REFCOUNT_ITEM_H__ */ -- cgit v1.2.3 From 6e0c9507bf51e1517a80ad0ac171e5402528fcef Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 12:28:17 +0100 Subject: i2c: i801: Fix Failed to allocate irq -2147483648 error On Apollo Lake devices the BIOS does not set up IRQ routing for the i801 SMBUS controller IRQ, so we end up with dev->irq set to IRQ_NOTCONNECTED. Detect this and do not try to use the irq in this case silencing: i801_smbus 0000:00:1f.1: Failed to allocate irq -2147483648: -107 Cc: stable@vger.kernel.org BugLink: https://communities.intel.com/thread/114759 Signed-off-by: Hans de Goede Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9e12a53ef7b8..8eac00efadc1 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1617,6 +1617,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; + if (dev->irq == IRQ_NOTCONNECTED) + priv->features &= ~FEATURE_IRQ; + if (priv->features & FEATURE_IRQ) { u16 pcictl, pcists; -- cgit v1.2.3 From 66a7c84d677e8e4a5a2ef4afdb9bd52e1399a866 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 17:52:24 +0000 Subject: i2c: i2c-boardinfo: fix memory leaks on devinfo Currently when an error occurs devinfo is still allocated but is unused when the error exit paths break out of the for-loop. Fix this by kfree'ing devinfo to avoid the leak. Detected by CoverityScan, CID#1416590 ("Resource Leak") Fixes: 4124c4eba402 ("i2c: allow attaching IRQ resources to i2c_board_info") Fixes: 0daaf99d8424 ("i2c: copy device properties when using i2c_register_board_info()") Signed-off-by: Colin Ian King Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-boardinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 31186ead5a40..509a6007cdf6 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -86,6 +86,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig property_entries_dup(info->properties); if (IS_ERR(devinfo->board_info.properties)) { status = PTR_ERR(devinfo->board_info.properties); + kfree(devinfo); break; } } @@ -98,6 +99,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig GFP_KERNEL); if (!devinfo->board_info.resources) { status = -ENOMEM; + kfree(devinfo); break; } } -- cgit v1.2.3 From 2967acbb257a6a9bf912f4778b727e00972eac9b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Nov 2017 11:52:55 -0700 Subject: blktrace: fix trace mutex deadlock A previous commit changed the locking around registration/cleanup, but direct callers of blk_trace_remove() were missed. This means that if we hit the error path in setup, we will deadlock on attempting to re-acquire the queue trace mutex. Fixes: 1f2cac107c59 ("blktrace: fix unlocked access to init/start-stop/teardown") Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c5987d4c5f23..987d9a9ae283 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } return 0; @@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } -- cgit v1.2.3 From 5b5971df3bc2775107ddad164018a8a8db633b81 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Thu, 23 Nov 2017 15:18:35 +0100 Subject: xen-netfront: remove warning when unloading module v2: * Replace busy wait with wait_event()/wake_up_all() * Cannot garantee that at the time xennet_remove is called, the xen_netback state will not be XenbusStateClosed, so added a condition for that * There's a small chance for the xen_netback state is XenbusStateUnknown by the time the xen_netfront switches to Closed, so added a condition for that. When unloading module xen_netfront from guest, dmesg would output warning messages like below: [ 105.236836] xen:grant_table: WARNING: g.e. 0x903 still in use! [ 105.236839] deferring g.e. 0x903 (pfn 0x35805) This problem relies on netfront and netback being out of sync. By the time netfront revokes the g.e.'s netback didn't have enough time to free all of them, hence displaying the warnings on dmesg. The trick here is to make netfront to wait until netback frees all the g.e.'s and only then continue to cleanup for the module removal, and this is done by manipulating both device states. Signed-off-by: Eduardo Otubo Acked-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8b8689c6d887..391432e2725d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,6 +87,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -2021,10 +2023,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2130,6 +2134,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); -- cgit v1.2.3 From 2e724dca7749223204bbae21745c0e3fc932700a Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 27 Nov 2017 20:13:39 +0100 Subject: tipc: eliminate access after delete in group_filter_msg() KASAN revealed another access after delete in group.c. This time it found that we read the header of a received message after the buffer has been released. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 12777cac638a..95fec2c057d6 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -497,6 +497,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, while ((skb = skb_peek(defq))) { hdr = buf_msg(skb); mtyp = msg_type(hdr); + blks = msg_blocks(hdr); deliver = true; ack = false; update = false; @@ -546,7 +547,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!update) continue; - blks = msg_blocks(hdr); tipc_group_update_rcv_win(grp, blks, node, port, xmitq); } return; -- cgit v1.2.3 From a8f97366452ed491d13cf1e44241bc0b5740b1f0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 27 Nov 2017 06:21:25 +0300 Subject: mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d() Currently, we unconditionally make page table dirty in touch_pmd(). It may result in false-positive can_follow_write_pmd(). We may avoid the situation, if we would only make the page table entry dirty if caller asks for write access -- FOLL_WRITE. The patch also changes touch_pud() in the same way. Signed-off-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86fe697e8bfb..0e7ded98d114 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) + pmd_t *pmd, int flags) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pmd is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pmd to - * set the young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); /* * device mapped pages can only be returned if the @@ -995,20 +990,15 @@ out: #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static void touch_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud) + pud_t *pud, int flags) { pud_t _pud; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pud is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pud to - * set the young bit, instead of the current set_pud_at. - */ - _pud = pud_mkyoung(pud_mkdirty(*pud)); + _pud = pud_mkyoung(*pud); + if (flags & FOLL_WRITE) + _pud = pud_mkdirty(_pud); if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, - pud, _pud, 1)) + pud, _pud, flags & FOLL_WRITE)) update_mmu_cache_pud(vma, addr, pud); } @@ -1031,7 +1021,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pud(vma, addr, pud); + touch_pud(vma, addr, pud, flags); /* * device mapped pages can only be returned if the @@ -1424,7 +1414,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid -- cgit v1.2.3 From 152e93af3cfe2d29d8136cc0a02a8612507136ee Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 27 Nov 2017 06:21:26 +0300 Subject: mm, thp: Do not make pmd/pud dirty without a reason Currently we make page table entries dirty all the time regardless of access type and don't even consider if the mapping is write-protected. The reasoning is that we don't really need dirty tracking on THP and making the entry dirty upfront may save some time on first write to the page. Unfortunately, such approach may result in false-positive can_follow_write_pmd() for huge zero page or read-only shmem file. Let's only make page dirty only if we about to write to the page anyway (as we do for small pages). I've restructured the code to make entry dirty inside maybe_p[mu]d_mkwrite(). It also takes into account if the vma is write-protected. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 31 +++++++++++++++++++------------ mm/internal.h | 3 ++- mm/khugepaged.c | 2 +- mm/memory.c | 2 +- mm/migrate.c | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0e7ded98d114..f22401fd83b5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -474,10 +474,13 @@ out: } __setup("transparent_hugepage=", setup_transparent_hugepage); -pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty) { - if (likely(vma->vm_flags & VM_WRITE)) + if (likely(vma->vm_flags & VM_WRITE)) { pmd = pmd_mkwrite(pmd); + if (dirty) + pmd = pmd_mkdirty(pmd); + } return pmd; } @@ -599,7 +602,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } entry = mk_huge_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); page_add_new_anon_rmap(page, vma, haddr, true); mem_cgroup_commit_charge(page, memcg, false, true); lru_cache_add_active_or_unevictable(page, vma); @@ -741,8 +744,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); if (write) { - entry = pmd_mkyoung(pmd_mkdirty(entry)); - entry = maybe_pmd_mkwrite(entry, vma); + entry = pmd_mkyoung(entry); + entry = maybe_pmd_mkwrite(entry, vma, true); } if (pgtable) { @@ -788,10 +791,14 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) +static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma, + bool dirty) { - if (likely(vma->vm_flags & VM_WRITE)) + if (likely(vma->vm_flags & VM_WRITE)) { pud = pud_mkwrite(pud); + if (dirty) + pud = pud_mkdirty(pud); + } return pud; } @@ -807,8 +814,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); if (write) { - entry = pud_mkyoung(pud_mkdirty(entry)); - entry = maybe_pud_mkwrite(entry, vma); + entry = pud_mkyoung(entry); + entry = maybe_pud_mkwrite(entry, vma, true); } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); @@ -1279,7 +1286,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); ret |= VM_FAULT_WRITE; @@ -1349,7 +1356,7 @@ alloc: } else { pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); page_add_new_anon_rmap(new_page, vma, haddr, true); mem_cgroup_commit_charge(new_page, memcg, false, true); @@ -2928,7 +2935,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (is_write_migration_entry(entry)) - pmde = maybe_pmd_mkwrite(pmde, vma); + pmde = maybe_pmd_mkwrite(pmde, vma, false); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); page_add_anon_rmap(new, vma, mmun_start, true); diff --git a/mm/internal.h b/mm/internal.h index e6bd35182dae..b35cdebda0ce 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -328,7 +328,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) } } -extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); +extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, + bool dirty); /* * At what user virtual address is page expected in @vma? diff --git a/mm/khugepaged.c b/mm/khugepaged.c index ea4ff259b671..db43dc8a8ae6 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm, pgtable = pmd_pgtable(_pmd); _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); - _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); + _pmd = maybe_pmd_mkwrite(_pmd, vma, false); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/memory.c b/mm/memory.c index 85e7a87da79f..b10c1d26f675 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, true); add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); page_add_file_rmap(page, true); diff --git a/mm/migrate.c b/mm/migrate.c index 4d0be47a322a..57865fc8cfe3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, } entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = maybe_pmd_mkwrite(entry, vma, false); /* * Clear the old entry under pagetable lock and establish the new PTE. -- cgit v1.2.3 From 141cbfba1d0502006463aa80f57c64086226af1a Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 10 Aug 2017 10:53:53 +0200 Subject: auxdisplay: img-ascii-lcd: Only build on archs that have IOMEM This avoids the MODPOST error: ERROR: "devm_ioremap_resource" [drivers/auxdisplay/img-ascii-lcd.ko] undefined! Signed-off-by: Thomas Meyer Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/auxdisplay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index d7d21118d3e0..2c2ed9cf8796 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -136,6 +136,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help -- cgit v1.2.3 From 1751e8a6cb935e555fcdbcb9ab4f0446e322ca3e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Nov 2017 13:05:09 -0800 Subject: Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/mtd/mtdsuper.c | 6 +-- drivers/staging/lustre/lustre/llite/file.c | 2 +- drivers/staging/lustre/lustre/llite/llite_lib.c | 14 +++---- fs/9p/vfs_super.c | 6 +-- fs/adfs/super.c | 4 +- fs/affs/amigaffs.c | 2 +- fs/affs/bitmap.c | 6 +-- fs/affs/super.c | 16 ++++---- fs/afs/super.c | 4 +- fs/befs/ChangeLog | 2 +- fs/befs/linuxvfs.c | 4 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent_io.c | 2 +- fs/btrfs/ioctl.c | 4 +- fs/btrfs/super.c | 50 ++++++++++++------------ fs/btrfs/volumes.c | 4 +- fs/ceph/super.c | 8 ++-- fs/cifs/cifs_fs_sb.h | 2 +- fs/cifs/cifsfs.c | 12 +++--- fs/cifs/cifsglob.h | 4 +- fs/cifs/inode.c | 2 +- fs/cifs/xattr.c | 8 ++-- fs/coda/inode.c | 4 +- fs/cramfs/inode.c | 4 +- fs/ecryptfs/main.c | 8 ++-- fs/efs/super.c | 4 +- fs/ext2/balloc.c | 4 +- fs/ext2/ialloc.c | 4 +- fs/ext2/super.c | 20 +++++----- fs/ext4/inode.c | 4 +- fs/ext4/super.c | 52 ++++++++++++------------- fs/f2fs/checkpoint.c | 10 ++--- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/recovery.c | 10 ++--- fs/f2fs/super.c | 28 ++++++------- fs/fat/fatent.c | 6 +-- fs/fat/inode.c | 8 ++-- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/freevxfs/vxfs_super.c | 4 +- fs/fs-writeback.c | 2 +- fs/fuse/inode.c | 12 +++--- fs/gfs2/ops_fstype.c | 16 ++++---- fs/gfs2/super.c | 10 ++--- fs/gfs2/trans.c | 2 +- fs/hfs/mdb.c | 4 +- fs/hfs/super.c | 16 ++++---- fs/hfsplus/super.c | 22 +++++------ fs/hpfs/map.c | 2 +- fs/hpfs/super.c | 8 ++-- fs/inode.c | 10 ++--- fs/isofs/inode.c | 2 +- fs/jffs2/fs.c | 4 +- fs/jffs2/os-linux.h | 2 +- fs/jffs2/super.c | 4 +- fs/jfs/super.c | 10 ++--- fs/kernfs/mount.c | 2 +- fs/libfs.c | 6 +-- fs/locks.c | 2 +- fs/minix/inode.c | 4 +- fs/ncpfs/inode.c | 4 +- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 2 +- fs/nfs/super.c | 22 +++++------ fs/nilfs2/segment.c | 2 +- fs/nilfs2/super.c | 24 ++++++------ fs/nilfs2/the_nilfs.c | 6 +-- fs/notify/fsnotify.c | 2 +- fs/nsfs.c | 2 +- fs/ntfs/super.c | 32 +++++++-------- fs/ocfs2/file.c | 2 +- fs/ocfs2/super.c | 28 ++++++------- fs/ocfs2/xattr.c | 2 +- fs/openpromfs/inode.c | 4 +- fs/orangefs/super.c | 8 ++-- fs/overlayfs/super.c | 10 ++--- fs/proc/inode.c | 2 +- fs/proc/root.c | 2 +- fs/proc_namespace.c | 8 ++-- fs/qnx4/inode.c | 4 +- fs/qnx6/inode.c | 4 +- fs/reiserfs/inode.c | 2 +- fs/reiserfs/journal.c | 6 +-- fs/reiserfs/prints.c | 4 +- fs/reiserfs/super.c | 18 ++++----- fs/reiserfs/xattr.c | 10 ++--- fs/romfs/super.c | 4 +- fs/squashfs/super.c | 4 +- fs/statfs.c | 6 +-- fs/sysfs/mount.c | 2 +- fs/sysv/inode.c | 2 +- fs/sysv/super.c | 2 +- fs/ubifs/file.c | 2 +- fs/ubifs/io.c | 2 +- fs/ubifs/super.c | 20 +++++----- fs/ubifs/ubifs.h | 4 +- fs/udf/super.c | 6 +-- fs/ufs/balloc.c | 8 ++-- fs/ufs/ialloc.c | 10 ++--- fs/ufs/super.c | 30 +++++++------- fs/xfs/xfs_log.c | 6 +-- fs/xfs/xfs_super.c | 8 ++-- fs/xfs/xfs_super.h | 2 +- include/linux/fs.h | 2 +- include/uapi/linux/bfs_fs.h | 2 +- ipc/mqueue.c | 2 +- mm/shmem.c | 10 ++--- security/apparmor/apparmorfs.c | 2 +- security/apparmor/include/lib.h | 2 +- 111 files changed, 417 insertions(+), 417 deletions(-) diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index e43fea896d1e..d58a61c09304 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -79,14 +79,14 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + ret = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); return ERR_PTR(ret); } /* go */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; return dget(sb->s_root); /* new mountpoint for an already mounted superblock */ @@ -202,7 +202,7 @@ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, not_an_MTD_device: #endif /* CONFIG_BLOCK */ - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) printk(KERN_NOTICE "MTD: Attempt to mount non-MTD device \"%s\"\n", dev_name); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 2d6e64dea266..938b859b6650 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1016,7 +1016,7 @@ static bool file_is_noatime(const struct file *file) if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) return true; - if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) return true; return false; diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 65ac5128f005..8666f1e81ade 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -313,11 +313,11 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, } if (data->ocd_connect_flags & OBD_CONNECT_ACL) { - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; sbi->ll_flags |= LL_SBI_ACL; } else { LCONSOLE_INFO("client wants to enable acl, but mdt not!\n"); - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; sbi->ll_flags &= ~LL_SBI_ACL; } @@ -660,7 +660,7 @@ void ll_kill_super(struct super_block *sb) struct ll_sb_info *sbi; /* not init sb ?*/ - if (!(sb->s_flags & MS_ACTIVE)) + if (!(sb->s_flags & SB_ACTIVE)) return; sbi = ll_s2sbi(sb); @@ -2039,8 +2039,8 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) int err; __u32 read_only; - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { - read_only = *flags & MS_RDONLY; + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { + read_only = *flags & SB_RDONLY; err = obd_set_info_async(NULL, sbi->ll_md_exp, sizeof(KEY_READ_ONLY), KEY_READ_ONLY, sizeof(read_only), @@ -2053,9 +2053,9 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) } if (read_only) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; else - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (sbi->ll_flags & LL_SBI_VERBOSE) LCONSOLE_WARN("Remounted %s %s\n", profilenm, diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8b75463cb211..af03c2a901eb 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -94,13 +94,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; - sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; + sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME; if (!v9ses->cache) - sb->s_flags |= MS_SYNCHRONOUS; + sb->s_flags |= SB_SYNCHRONOUS; #ifdef CONFIG_9P_FS_POSIX_ACL if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif return 0; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index c9fdfb112933..cfda2c7caedc 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -213,7 +213,7 @@ static int parse_options(struct super_block *sb, char *options) static int adfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return parse_options(sb, data); } @@ -372,7 +372,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root; int ret = -EINVAL; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; asb = kzalloc(sizeof(*asb), GFP_KERNEL); if (!asb) diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 185d5ab7e986..0f0e6925e97d 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -453,7 +453,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...) pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf); if (!sb_rdonly(sb)) pr_warn("Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; va_end(args); } diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 2b1399611d9e..5ba9ef2742f6 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -250,12 +250,12 @@ int affs_init_bitmap(struct super_block *sb, int *flags) int i, res = 0; struct affs_sb_info *sbi = AFFS_SB(sb); - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) return 0; if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) { pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -288,7 +288,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) if (affs_checksum_block(sb, bh)) { pr_warn("Bitmap %u invalid - mounting %s read only.\n", bm->bm_key, sb->s_id); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; goto out; } pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key); diff --git a/fs/affs/super.c b/fs/affs/super.c index 884bedab7266..1117e36134cc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = AFFS_SUPER_MAGIC; sb->s_op = &affs_sops; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL); if (!sbi) @@ -466,7 +466,7 @@ got_root: if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS || chksum == MUFS_DCOFS) && !sb_rdonly(sb)) { pr_notice("Dircache FS - mounting %s read only\n", sb->s_id); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } switch (chksum) { case MUFS_FS: @@ -488,7 +488,7 @@ got_root: /* fall thru */ case FS_OFS: affs_set_opt(sbi->s_flags, SF_OFS); - sb->s_flags |= MS_NOEXEC; + sb->s_flags |= SB_NOEXEC; break; case MUFS_DCOFS: case MUFS_INTLOFS: @@ -497,7 +497,7 @@ got_root: case FS_INTLOFS: affs_set_opt(sbi->s_flags, SF_INTL); affs_set_opt(sbi->s_flags, SF_OFS); - sb->s_flags |= MS_NOEXEC; + sb->s_flags |= SB_NOEXEC; break; default: pr_err("Unknown filesystem on device %s: %08X\n", @@ -513,7 +513,7 @@ got_root: sig, sig[3] + '0', blocksize); } - sb->s_flags |= MS_NODEV | MS_NOSUID; + sb->s_flags |= SB_NODEV | SB_NOSUID; sbi->s_data_blksize = sb->s_blocksize; if (affs_test_opt(sbi->s_flags, SF_OFS)) @@ -570,7 +570,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; memcpy(volume, sbi->s_volume, 32); if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, @@ -596,10 +596,10 @@ affs_remount(struct super_block *sb, int *flags, char *data) memcpy(sbi->s_volume, volume, 32); spin_unlock(&sbi->symlink_lock); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) affs_free_bitmap(sb); else res = affs_init_bitmap(sb, flags); diff --git a/fs/afs/super.c b/fs/afs/super.c index 875b5eb02242..d3f97da61bdf 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -496,10 +496,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (ret < 0) goto error_sb; as = NULL; - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); - ASSERTCMP(sb->s_flags, &, MS_ACTIVE); + ASSERTCMP(sb->s_flags, &, SB_ACTIVE); afs_destroy_sbi(as); as = NULL; } diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index 75a461cfaca6..16f2dfe8c2f7 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -365,7 +365,7 @@ Version 0.4 (2001-10-28) (fs/befs/super.c) * Tell the kernel to only mount befs read-only. - By setting the MS_RDONLY flag in befs_read_super(). + By setting the SB_RDONLY flag in befs_read_super(). Not that it was possible to write before. But now the kernel won't even try. (fs/befs/super.c) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index a92355cc453b..ee236231cafa 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -841,7 +841,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { befs_warning(sb, "No write support. Marking filesystem read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* @@ -948,7 +948,7 @@ static int befs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) return -EINVAL; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7df5536ab61..51477a537c83 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2957,7 +2957,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) */ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) { - return fs_info->sb->s_flags & MS_RDONLY || btrfs_fs_closing(fs_info); + return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info); } static inline void free_fs_info(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 16045ea86fc1..f9e9f721efe2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, struct btrfs_bio *bbio = NULL; int ret; - ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); + ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); BUG_ON(!mirror_num); bio = btrfs_io_bio_alloc(1); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd172a93d11a..d748ad1c3620 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1172,7 +1172,7 @@ again: if (!i_done || ret) goto out; - if (!(inode->i_sb->s_flags & MS_ACTIVE)) + if (!(inode->i_sb->s_flags & SB_ACTIVE)) goto out; /* @@ -1333,7 +1333,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * make sure we stop running if someone unmounts * the FS */ - if (!(inode->i_sb->s_flags & MS_ACTIVE)) + if (!(inode->i_sb->s_flags & SB_ACTIVE)) break; if (btrfs_defrag_cancelled(fs_info)) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65af029559b5..305cae7444a0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -107,7 +107,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) return; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; btrfs_info(fs_info, "forced readonly"); /* * Note that a running device replace operation is not @@ -137,7 +137,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function /* * Special case: if the error is EROFS, and we're already - * under MS_RDONLY, then it is safe here. + * under SB_RDONLY, then it is safe here. */ if (errno == -EROFS && sb_rdonly(sb)) return; @@ -168,7 +168,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); /* Don't go through full error handling during mount */ - if (sb->s_flags & MS_BORN) + if (sb->s_flags & SB_BORN) btrfs_handle_error(fs_info); } @@ -625,7 +625,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_acl: #ifdef CONFIG_BTRFS_FS_POSIX_ACL - info->sb->s_flags |= MS_POSIXACL; + info->sb->s_flags |= SB_POSIXACL; break; #else btrfs_err(info, "support for ACL not compiled in!"); @@ -633,7 +633,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, goto out; #endif case Opt_noacl: - info->sb->s_flags &= ~MS_POSIXACL; + info->sb->s_flags &= ~SB_POSIXACL; break; case Opt_notreelog: btrfs_set_and_info(info, NOTREELOG, @@ -851,7 +851,7 @@ check: /* * Extra check for current option against current flag */ - if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { + if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) { btrfs_err(info, "nologreplay must be used with ro mount option"); ret = -EINVAL; @@ -1147,7 +1147,7 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; #ifdef CONFIG_BTRFS_FS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; @@ -1180,7 +1180,7 @@ static int btrfs_fill_super(struct super_block *sb, } cleancache_init_fs(sb); - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; return 0; fail_close: @@ -1277,7 +1277,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(info, DISCARD)) seq_puts(seq, ",discard"); - if (!(info->sb->s_flags & MS_POSIXACL)) + if (!(info->sb->s_flags & SB_POSIXACL)) seq_puts(seq, ",noacl"); if (btrfs_test_opt(info, SPACE_CACHE)) seq_puts(seq, ",space_cache"); @@ -1409,11 +1409,11 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { - if (flags & MS_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, + if (flags & SB_RDONLY) { + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY, device_name, newargs); } else { - mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, + mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY, device_name, newargs); if (IS_ERR(mnt)) { root = ERR_CAST(mnt); @@ -1565,7 +1565,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, u64 subvol_objectid = 0; int error = 0; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, @@ -1619,13 +1619,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error) goto error_fs_info; - if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { + if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { error = -EACCES; goto error_close_devices; } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); @@ -1635,7 +1635,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (s->s_root) { btrfs_close_devices(fs_devices); free_fs_info(fs_info); - if ((flags ^ s->s_flags) & MS_RDONLY) + if ((flags ^ s->s_flags) & SB_RDONLY) error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); @@ -1702,11 +1702,11 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, { if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || - (flags & MS_RDONLY))) { + (flags & SB_RDONLY))) { /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); - if (flags & MS_RDONLY) + if (flags & SB_RDONLY) sync_filesystem(fs_info->sb); } } @@ -1766,10 +1766,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { /* * this also happens on 'umount -rf' or on shutdown, when * the filesystem is busy. @@ -1781,10 +1781,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) /* avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* - * Setting MS_RDONLY will put the cleaner thread to + * Setting SB_RDONLY will put the cleaner thread to * sleep at the next loop if it's already active. * If it's already asleep, we'll leave unused block * groups on disk until we're mounted read-write again @@ -1856,7 +1856,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; set_bit(BTRFS_FS_OPEN, &fs_info->flags); } @@ -1866,9 +1866,9 @@ out: return 0; restore: - /* We've hit an error - don't reset MS_RDONLY */ + /* We've hit an error - don't reset SB_RDONLY */ if (sb_rdonly(sb)) - old_flags |= MS_RDONLY; + old_flags |= SB_RDONLY; sb->s_flags = old_flags; fs_info->mount_opt = old_opts; fs_info->compress_type = old_compress_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1ecb938ba4d..925070b9ce03 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2384,7 +2384,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; ret = btrfs_prepare_sprout(fs_info); if (ret) { btrfs_abort_transaction(trans, ret); @@ -2497,7 +2497,7 @@ error_sysfs: btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); error_trans: if (seeding_dev) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (trans) btrfs_end_transaction(trans); rcu_string_free(device->name); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fe9fbb3f13f7..a62d2a9841dc 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -331,11 +331,11 @@ static int parse_fsopt_token(char *c, void *private) break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: - fsopt->sb_flags |= MS_POSIXACL; + fsopt->sb_flags |= SB_POSIXACL; break; #endif case Opt_noacl: - fsopt->sb_flags &= ~MS_POSIXACL; + fsopt->sb_flags &= ~SB_POSIXACL; break; default: BUG_ON(token); @@ -520,7 +520,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nopoolperm"); #ifdef CONFIG_CEPH_FS_POSIX_ACL - if (fsopt->sb_flags & MS_POSIXACL) + if (fsopt->sb_flags & SB_POSIXACL) seq_puts(m, ",acl"); else seq_puts(m, ",noacl"); @@ -988,7 +988,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, dout("ceph_mount\n"); #ifdef CONFIG_CEPH_FS_POSIX_ACL - flags |= MS_POSIXACL; + flags |= SB_POSIXACL; #endif err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); if (err < 0) { diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index cbd216b57239..350fa55a1bf7 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -42,7 +42,7 @@ #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ -#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ +#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of SB_POSIXACL in mnt_cifs_flags */ #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */ #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */ #define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c8b75d33f31..31b7565b1617 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -125,7 +125,7 @@ cifs_read_super(struct super_block *sb) tcon = cifs_sb_master_tcon(cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -497,7 +497,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",cifsacl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) seq_puts(s, ",dynperm"); - if (root->d_sb->s_flags & MS_POSIXACL) + if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(s, ",acl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) seq_puts(s, ",mfsymlinks"); @@ -573,7 +573,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) static int cifs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return 0; } @@ -708,7 +708,7 @@ cifs_do_mount(struct file_system_type *fs_type, rc = cifs_mount(cifs_sb, volume_info); if (rc) { - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); @@ -720,7 +720,7 @@ cifs_do_mount(struct file_system_type *fs_type, mnt_data.flags = flags; /* BB should we make this contingent on mount parm? */ - flags |= MS_NODIRATIME | MS_NOATIME; + flags |= SB_NODIRATIME | SB_NOATIME; sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { @@ -739,7 +739,7 @@ cifs_do_mount(struct file_system_type *fs_type, goto out_super; } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; } root = cifs_get_root(volume_info, sb); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e185b2853eab..b16583594d1a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -559,8 +559,8 @@ struct smb_vol { CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID) -#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \ - MS_NODEV | MS_SYNCHRONOUS) +#define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ + SB_NODEV | SB_SYNCHRONOUS) struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7c732cb44164..ecb99079363a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -985,7 +985,7 @@ retry_iget5_locked: } cifs_fattr_to_inode(inode, fattr); - if (sb->s_flags & MS_NOATIME) + if (sb->s_flags & SB_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { inode->i_ino = hash; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 52f975d848a0..316af84674f1 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -117,7 +117,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, #ifdef CONFIG_CIFS_POSIX if (!value) goto out; - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, value, (const int)size, ACL_TYPE_ACCESS, cifs_sb->local_nls, @@ -129,7 +129,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, #ifdef CONFIG_CIFS_POSIX if (!value) goto out; - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, value, (const int)size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, @@ -266,7 +266,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_ACL_ACCESS: #ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, value, size, ACL_TYPE_ACCESS, cifs_sb->local_nls, @@ -276,7 +276,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_ACL_DEFAULT: #ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, value, size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6f0a6a4d5faa..97424cf206c0 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -96,7 +96,7 @@ void coda_destroy_inodecache(void) static int coda_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } @@ -188,7 +188,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) mutex_unlock(&vc->vc_mutex); sb->s_fs_info = vc; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; sb->s_blocksize = 4096; /* XXXXX what do we put here?? */ sb->s_blocksize_bits = 12; sb->s_magic = CODA_SUPER_MAGIC; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9a2ab419ba62..017b0ab19bc4 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -505,7 +505,7 @@ static void cramfs_kill_sb(struct super_block *sb) static int cramfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -592,7 +592,7 @@ static int cramfs_finalize_super(struct super_block *sb, struct inode *root; /* Set it all up.. */ - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_op = &cramfs_ops; root = get_cramfs_inode(sb, cramfs_root, 0); if (IS_ERR(root)) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index f2677c90d96e..025d66a705db 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -560,8 +560,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags * Set the POSIX ACL flag based on whether they're enabled in the lower * mount. */ - s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + s->s_flags = flags & ~SB_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & SB_POSIXACL; /** * Force a read-only eCryptfs mount when: @@ -569,7 +569,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags * 2) The ecryptfs_encrypted_view mount option is specified */ if (sb_rdonly(path.dentry->d_sb) || mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; @@ -602,7 +602,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_dentry_private(s->s_root, root_info); root_info->lower_path = path; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; return dget(s->s_root); out_free: diff --git a/fs/efs/super.c b/fs/efs/super.c index 65b59009555b..6ffb7ba1547a 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -116,7 +116,7 @@ static void destroy_inodecache(void) static int efs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -311,7 +311,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) #ifdef DEBUG pr_info("forcing read-only mode\n"); #endif - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } s->s_op = &efs_superblock_operations; s->s_export_op = &efs_export_ops; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e1b3724bebf2..33db13365c5e 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -548,7 +548,7 @@ do_more: } mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); group_adjust_blocks(sb, block_group, desc, bh2, group_freed); @@ -1424,7 +1424,7 @@ allocated: percpu_counter_sub(&sbi->s_freeblocks_counter, num); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); *errp = 0; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a1fc3dabca41..6484199b35d1 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -145,7 +145,7 @@ void ext2_free_inode (struct inode * inode) else ext2_release_inode(sb, block_group, is_directory); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); @@ -517,7 +517,7 @@ repeat_in_this_group: goto fail; got: mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e2b6be03e69b..7646818ab266 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -75,7 +75,7 @@ void ext2_error(struct super_block *sb, const char *function, if (test_opt(sb, ERRORS_RO)) { ext2_msg(sb, KERN_CRIT, "error: remounting filesystem read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } @@ -656,7 +656,7 @@ static int ext2_setup_super (struct super_block * sb, ext2_msg(sb, KERN_ERR, "error: revision level too high, " "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } if (read_only) return res; @@ -924,9 +924,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resuid = opts.s_resuid; sbi->s_resgid = opts.s_resgid; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - MS_POSIXACL : 0); + SB_POSIXACL : 0); sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -1178,7 +1178,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_msg(sb, KERN_WARNING, "warning: mounting ext3 filesystem as ext2"); if (ext2_setup_super (sb, es, sb_rdonly(sb))) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ext2_write_super(sb); return 0; @@ -1341,9 +1341,9 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) "dax flag with busy inodes while remounting"); new_opts.s_mount_opt ^= EXT2_MOUNT_DAX; } - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out_set; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || !(sbi->s_mount_state & EXT2_VALID_FS)) goto out_set; @@ -1379,7 +1379,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) */ sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; spin_unlock(&sbi->s_lock); ext2_write_super(sb); @@ -1392,8 +1392,8 @@ out_set: sbi->s_mount_opt = new_opts.s_mount_opt; sbi->s_resuid = new_opts.s_resuid; sbi->s_resgid = new_opts.s_resgid; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0992d76f7ab1..7df2c5644e59 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2742,7 +2742,7 @@ static int ext4_writepages(struct address_space *mapping, * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that * will obscure the real source of the problem. We test - * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because + * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because * the latter could be true if the filesystem is mounted * read-only, and in that case, ext4_writepages should * *never* be called, so if that ever happens, we would want @@ -5183,7 +5183,7 @@ static int ext4_do_update_inode(handle_t *handle, ext4_inode_csum_set(inode, raw_inode, ei); spin_unlock(&ei->i_raw_lock); - if (inode->i_sb->s_flags & MS_LAZYTIME) + if (inode->i_sb->s_flags & SB_LAZYTIME) ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, bh->b_data); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0556cd036b69..7c46693a14d7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -422,7 +422,7 @@ static void ext4_handle_error(struct super_block *sb) * before ->s_flags update */ smp_wmb(); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if (test_opt(sb, ERRORS_PANIC)) { if (EXT4_SB(sb)->s_journal && @@ -635,7 +635,7 @@ void __ext4_abort(struct super_block *sb, const char *function, * before ->s_flags update */ smp_wmb(); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); @@ -1682,10 +1682,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sb->s_flags |= SB_I_VERSION; return 1; case Opt_lazytime: - sb->s_flags |= MS_LAZYTIME; + sb->s_flags |= SB_LAZYTIME; return 1; case Opt_nolazytime: - sb->s_flags &= ~MS_LAZYTIME; + sb->s_flags &= ~SB_LAZYTIME; return 1; } @@ -2116,7 +2116,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { ext4_msg(sb, KERN_ERR, "revision level too high, " "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } if (read_only) goto done; @@ -2429,7 +2429,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ - if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { + if (es->s_last_orphan && !(s_flags & SB_RDONLY)) { ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list.\n"); es->s_last_orphan = 0; @@ -2438,19 +2438,19 @@ static void ext4_orphan_cleanup(struct super_block *sb, return; } - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; /* * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) { int ret = ext4_enable_quotas(sb); if (!ret) @@ -2539,7 +2539,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } } #endif - sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sb->s_flags = s_flags; /* Restore SB_RDONLY status */ } /* @@ -2741,7 +2741,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) if (ext4_has_feature_readonly(sb)) { ext4_msg(sb, KERN_INFO, "filesystem is read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; return 1; } @@ -3623,8 +3623,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_iflags |= SB_I_CGROUPWB; } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (ext4_has_compat_features(sb) || @@ -4199,7 +4199,7 @@ no_journal: } if (ext4_setup_super(sb, es, sb_rdonly(sb))) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && @@ -4693,7 +4693,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) * the clock is set in the future, and this will cause e2fsck * to complain and force a full file system check. */ - if (!(sb->s_flags & MS_RDONLY)) + if (!(sb->s_flags & SB_RDONLY)) es->s_wtime = cpu_to_le32(get_seconds()); if (sb->s_bdev->bd_part) es->s_kbytes_written = @@ -5047,8 +5047,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); es = sbi->s_es; @@ -5057,16 +5057,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } - if (*flags & MS_LAZYTIME) - sb->s_flags |= MS_LAZYTIME; + if (*flags & SB_LAZYTIME) + sb->s_flags |= SB_LAZYTIME; - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; } - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { err = sync_filesystem(sb); if (err < 0) goto restore_opts; @@ -5078,7 +5078,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * First of all, the unconditional stuff we have to do * to disable replay of the journal when we next remount */ - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * OK, test if we are remounting a valid rw partition @@ -5140,7 +5140,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext4_setup_super(sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (ext4_has_feature_mmp(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) { @@ -5164,7 +5164,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } ext4_setup_system_zone(sb); - if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) + if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA @@ -5182,7 +5182,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #endif - *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); + *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd2e73e10857..4aa69bc1c70a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -617,17 +617,17 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); - sbi->sb->s_flags &= ~MS_RDONLY; + sbi->sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= MS_ACTIVE; + sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -658,7 +658,7 @@ out: if (quota_enabled) f2fs_quota_off_umount(sbi->sb); #endif - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f4e094e816c6..6abf26c31d01 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2378,7 +2378,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) static inline int f2fs_readonly(struct super_block *sb) { - return sb->s_flags & MS_RDONLY; + return sb->s_flags & SB_RDONLY; } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5d5bba462f26..d844dcb80570 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1005,7 +1005,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, cpc.reason = __get_cp_reason(sbi); gc_more: - if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { + if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { ret = -EINVAL; goto stop; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 92c57ace1939..b3a14b0429f2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -598,16 +598,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int quota_enabled; #endif - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); - sbi->sb->s_flags &= ~MS_RDONLY; + sbi->sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= MS_ACTIVE; + sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -671,7 +671,7 @@ out: if (quota_enabled) f2fs_quota_off_umount(sbi->sb); #endif - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return ret ? ret: err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6c5dd450002..708155d9c2e4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -534,10 +534,10 @@ static int parse_options(struct super_block *sb, char *options) #endif break; case Opt_lazytime: - sb->s_flags |= MS_LAZYTIME; + sb->s_flags |= SB_LAZYTIME; break; case Opt_nolazytime: - sb->s_flags &= ~MS_LAZYTIME; + sb->s_flags &= ~SB_LAZYTIME; break; #ifdef CONFIG_QUOTA case Opt_quota: @@ -1168,7 +1168,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); - sbi->sb->s_flags |= MS_LAZYTIME; + sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_mounted_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); @@ -1236,7 +1236,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_msg(sb, KERN_INFO, "Try to recover all the superblocks, ret: %d", err); @@ -1255,17 +1255,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) + if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) goto skip; #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { + if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; } else { /* dquot_resume needs RW */ - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); } else if (f2fs_sb_has_quota_ino(sb)) { @@ -1288,7 +1288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { if (sbi->gc_thread) { stop_gc_thread(sbi); need_restart_gc = true; @@ -1300,7 +1300,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1314,7 +1314,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); destroy_flush_cmd_control(sbi, false); } else { @@ -1329,8 +1329,8 @@ skip: kfree(s_qf_names[i]); #endif /* Update the POSIXACL Flag */ - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); return 0; restore_gc: @@ -2472,8 +2472,8 @@ try_onemore: sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; sb->s_time_gran = 1; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 48b2336692f9..bac10de678cc 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -392,7 +392,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); if (err) @@ -597,7 +597,7 @@ int fat_free_clusters(struct inode *inode, int cluster) } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { - if (sb->s_flags & MS_SYNCHRONOUS) { + if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; @@ -612,7 +612,7 @@ int fat_free_clusters(struct inode *inode, int cluster) fat_collect_bhs(bhs, &nr_bhs, &fatent); } while (cluster != FAT_ENT_EOF); - if (sb->s_flags & MS_SYNCHRONOUS) { + if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 30c52394a7ad..016c46b5e44c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -781,12 +781,12 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) { int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); - *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); sync_filesystem(sb); /* make sure we update state on remount. */ - new_rdonly = *flags & MS_RDONLY; + new_rdonly = *flags & SB_RDONLY; if (new_rdonly != sb_rdonly(sb)) { if (new_rdonly) fat_set_state(sb, 0, 0); @@ -1352,7 +1352,7 @@ out: if (opts->unicode_xlate) opts->utf8 = 0; if (opts->nfs == FAT_NFS_NOSTALE_RO) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_export_op = &fat_export_ops_nostale; } @@ -1608,7 +1608,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, return -ENOMEM; sb->s_fs_info = sbi; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; sb->s_magic = MSDOS_SUPER_MAGIC; sb->s_op = &fat_sops; sb->s_export_op = &fat_export_ops; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index acc3aa30ee54..f9bdc1e01c98 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -33,7 +33,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) if (opts->errors == FAT_ERRORS_PANIC) panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); else if (opts->errors == FAT_ERRORS_RO && !sb_rdonly(sb)) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); } } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 7d6a105d601b..d24d2758a363 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -646,7 +646,7 @@ static void setup(struct super_block *sb) { MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations; sb->s_d_op = &msdos_dentry_operations; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; } static int msdos_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 455ce5b77e9b..f989efa051a0 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -116,7 +116,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) static int vxfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -220,7 +220,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) int ret = -EINVAL; u32 j; - sbp->s_flags |= MS_RDONLY; + sbp->s_flags |= SB_RDONLY; infp = kzalloc(sizeof(*infp), GFP_KERNEL); if (!infp) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 08f5debd07d1..cea4836385b7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -490,7 +490,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); - if (!(inode->i_sb->s_flags & MS_ACTIVE) || + if (!(inode->i_sb->s_flags & SB_ACTIVE) || inode->i_state & (I_WB_SWITCH | I_FREEING) || inode_to_wb(inode) == isw->new_wb) { spin_unlock(&inode->i_lock); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f504d615d92..624f18bbfd2b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -130,7 +130,7 @@ static void fuse_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - if (inode->i_sb->s_flags & MS_ACTIVE) { + if (inode->i_sb->s_flags & SB_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); @@ -141,7 +141,7 @@ static void fuse_evict_inode(struct inode *inode) static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (*flags & MS_MANDLOCK) + if (*flags & SB_MANDLOCK) return -EINVAL; return 0; @@ -1056,10 +1056,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) int is_bdev = sb->s_bdev != NULL; err = -EINVAL; - if (sb->s_flags & MS_MANDLOCK) + if (sb->s_flags & SB_MANDLOCK) goto err; - sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); + sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); if (!parse_fuse_opt(data, &d, is_bdev)) goto err; @@ -1109,9 +1109,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_dev_free; /* Handle umasking inside the fuse code */ - if (sb->s_flags & MS_POSIXACL) + if (sb->s_flags & SB_POSIXACL) fc->dont_mask = 1; - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; fc->default_permissions = d.default_permissions; fc->allow_other = d.allow_other; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a3711f543405..ad55eb86a250 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1065,15 +1065,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp->sd_args = *args; if (sdp->sd_args.ar_spectator) { - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; set_bit(SDF_RORECOVERY, &sdp->sd_flags); } if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); - sb->s_flags |= MS_NOSEC; + sb->s_flags |= SB_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_d_op = &gfs2_dops; @@ -1257,7 +1257,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, struct gfs2_args args; struct gfs2_sbd *sdp; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1313,15 +1313,15 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (s->s_root) { error = -EBUSY; - if ((flags ^ s->s_flags) & MS_RDONLY) + if ((flags ^ s->s_flags) & SB_RDONLY) goto error_super; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); + error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0); if (error) goto error_super; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; bdev->bd_super = s; } @@ -1365,7 +1365,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, pr_warn("gfs2 mount does not exist\n"); return ERR_CAST(s); } - if ((flags ^ s->s_flags) & MS_RDONLY) { + if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); return ERR_PTR(-EBUSY); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9cb5c9a97d69..d81d46e19726 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1256,10 +1256,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) return -EINVAL; if (sdp->sd_args.ar_spectator) - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; - if ((sb->s_flags ^ *flags) & MS_RDONLY) { - if (*flags & MS_RDONLY) + if ((sb->s_flags ^ *flags) & SB_RDONLY) { + if (*flags & SB_RDONLY) error = gfs2_make_fs_ro(sdp); else error = gfs2_make_fs_rw(sdp); @@ -1269,9 +1269,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sdp->sd_args = args; if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; else - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); else diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a85ca8b2c9ba..ca8b72d0a831 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -117,7 +117,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) kfree(tr); up_read(&sdp->sd_log_flush_lock); - if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) + if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); if (alloced) sb_end_intwrite(sdp->sd_vfs); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 894994d2c885..460281b1299e 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -204,11 +204,11 @@ int hfs_mdb_get(struct super_block *sb) attrib = mdb->drAtrb; if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { pr_warn("filesystem is marked locked, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } if (!sb_rdonly(sb)) { /* Mark the volume uncleanly unmounted in case we crash */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 7e0d65e9586c..173876782f73 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -114,18 +114,18 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + *flags |= SB_NODIRATIME; + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (!(*flags & MS_RDONLY)) { + if (!(*flags & SB_RDONLY)) { if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) { pr_warn("filesystem is marked locked, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } } return 0; @@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &hfs_super_operations; sb->s_xattr = hfs_xattr_handlers; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= SB_NODIRATIME; mutex_init(&sbi->bitmap_lock); res = hfs_mdb_get(sb); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e5bb2de2262a..1d458b716957 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -329,9 +329,9 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfsplus_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (!(*flags & MS_RDONLY)) { + if (!(*flags & SB_RDONLY)) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; int force = 0; @@ -340,20 +340,20 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (force) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("filesystem is marked locked, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { pr_warn("filesystem is marked journaled, leaving read-only.\n"); - sb->s_flags |= MS_RDONLY; - *flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; + *flags |= SB_RDONLY; } } return 0; @@ -455,16 +455,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("Filesystem is marked locked, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !sb_rdonly(sb)) { pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } err = -EINVAL; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index e0e60b148400..7c49f1ef0c85 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -288,7 +288,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, goto bail; } if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { - if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok; + if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1516fb4e28f4..c45a3b9b9ac7 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -78,7 +78,7 @@ void hpfs_error(struct super_block *s, const char *fmt, ...) else { pr_cont("; remounting read-only\n"); mark_dirty(s, 0); - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } } else if (sb_rdonly(s)) pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); @@ -457,7 +457,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) sync_filesystem(s); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; @@ -488,7 +488,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; sbi->sb_err = errs; sbi->sb_timeshift = timeshift; - if (!(*flags & MS_RDONLY)) mark_dirty(s, 1); + if (!(*flags & SB_RDONLY)) mark_dirty(s, 1); hpfs_unlock(s); return 0; @@ -614,7 +614,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) goto bail4; } - s->s_flags |= MS_NOATIME; + s->s_flags |= SB_NOATIME; /* Fill superblock stuff */ s->s_magic = HPFS_SUPER_MAGIC; diff --git a/fs/inode.c b/fs/inode.c index fd401028a309..03102d6ef044 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -416,7 +416,7 @@ void inode_add_lru(struct inode *inode) { if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) && - !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) + !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE) inode_lru_list_add(inode); } @@ -595,7 +595,7 @@ static void dispose_list(struct list_head *head) * @sb: superblock to operate on * * Make sure that no inodes with zero refcount are retained. This is - * called by superblock shutdown after having MS_ACTIVE flag removed, + * called by superblock shutdown after having SB_ACTIVE flag removed, * so any inode reaching zero refcount during or after that call will * be immediately evicted. */ @@ -1492,7 +1492,7 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & MS_ACTIVE)) { + if (!drop && (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; @@ -1644,7 +1644,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags) if (flags & S_MTIME) inode->i_mtime = *time; - if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) + if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION)) iflags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, iflags); return 0; @@ -1691,7 +1691,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, if (IS_NOATIME(inode)) return false; - if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) return false; if (mnt->mnt_flags & MNT_NOATIME) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 447a24d77b89..bc258a4402f6 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,7 +114,7 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) return -EROFS; return 0; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e96c6b05e43e..d8c274d39ddb 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -409,10 +409,10 @@ int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data) mutex_unlock(&c->alloc_sem); } - if (!(*flags & MS_RDONLY)) + if (!(*flags & SB_RDONLY)) jffs2_start_garbage_collect_thread(c); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 824e61ede465..c2fbec19c616 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -59,7 +59,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) } -#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) +#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & SB_RDONLY) #define SECTOR_ADDR(x) ( (((unsigned long)(x) / c->sector_size) * c->sector_size) ) #ifndef CONFIG_JFFS2_FS_WRITEBUFFER diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 153f1c6eb169..f60dee7faf03 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -301,10 +301,10 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &jffs2_super_operations; sb->s_export_op = &jffs2_export_ops; - sb->s_flags = sb->s_flags | MS_NOATIME; + sb->s_flags = sb->s_flags | SB_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif ret = jffs2_do_fill_super(sb, data, silent); return ret; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 2f7b3af5b8b7..90373aebfdca 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -87,7 +87,7 @@ static void jfs_handle_error(struct super_block *sb) else if (sbi->flag & JFS_ERR_REMOUNT_RO) { jfs_err("ERROR: (device %s): remounting filesystem as read-only", sb->s_id); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* nothing is done for continue beyond marking the superblock dirty */ @@ -477,7 +477,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return rc; } - if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) { /* * Invalidate any previously read metadata. fsck may have * changed the on-disk data since we mounted r/o @@ -488,12 +488,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) ret = jfs_mount_rw(sb, 1); /* mark the fs r/w for quota activity */ - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; dquot_resume(sb, -1); return ret; } - if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { + if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) { rc = dquot_suspend(sb, -1); if (rc < 0) return rc; @@ -545,7 +545,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sbi->flag = flag; #ifdef CONFIG_JFS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif if (newLVSize) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 95a7c88baed9..26dd9a50f383 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -335,7 +335,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, deactivate_locked_super(sb); return ERR_PTR(error); } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; mutex_lock(&kernfs_mutex); list_add(&info->node, &root->supers); diff --git a/fs/libfs.c b/fs/libfs.c index 3aabe553fc45..7ff3cb904acd 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,7 +246,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER, &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -277,7 +277,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_d_op = dops; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; return dget(s->s_root); Enomem: @@ -578,7 +578,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/locks.c b/fs/locks.c index 1bd71c4d663a..21b4dfa289ee 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -141,7 +141,7 @@ static inline bool is_remote_lock(struct file *filp) { - return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK)); + return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK)); } static bool lease_breaking(struct file_lock *fl) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index b6829d679643..72e308c3e66b 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -125,9 +125,9 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) sync_filesystem(sb); ms = sbi->s_ms; - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { if (ms->s_state & MINIX_VALID_FS || !(sbi->s_mount_state & MINIX_VALID_FS)) return 0; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 129f1937fa2c..41de88cdc053 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -103,7 +103,7 @@ static void destroy_inodecache(void) static int ncp_remount(struct super_block *sb, int *flags, char* data) { sync_filesystem(sb); - *flags |= MS_NODIRATIME; + *flags |= SB_NODIRATIME; return 0; } @@ -547,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) else default_bufsize = 1024; - sb->s_flags |= MS_NODIRATIME; /* probably even noatime */ + sb->s_flags |= SB_NODIRATIME; /* probably even noatime */ sb->s_maxbytes = 0xFFFFFFFFU; sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e51ae52ed14f..2f3f86726f5b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1256,7 +1256,7 @@ static int nfs_dentry_delete(const struct dentry *dentry) /* Unhash it, so that ->d_iput() would be called */ return 1; } - if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + if (!(dentry->d_sb->s_flags & SB_ACTIVE)) { /* Unhash it, so that ancestors of killed async unlink * files will be cleaned up during umount */ return 1; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 38b93d54c02e..b992d2382ffa 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -752,7 +752,7 @@ int nfs_getattr(const struct path *path, struct kstat *stat, * Note that we only have to check the vfsmount flags here: * - NFS always sets S_NOATIME by so checking it would give a * bogus result - * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * - NFS never sets SB_NOATIME or SB_NODIRATIME so there is * no point in checking those. */ if ((path->mnt->mnt_flags & MNT_NOATIME) || diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ab17fd4700a..8357ff69962f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -10,7 +10,7 @@ #include #include -#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) +#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 43cadb28db6e..29bacdc56f6a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -813,9 +813,9 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) */ seq_printf(m, "\n\topts:\t"); seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); - seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); - seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : ""); - seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); + seq_puts(m, root->d_sb->s_flags & SB_NODIRATIME ? ",nodiratime" : ""); nfs_show_mount_options(m, nfss, 1); seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); @@ -2296,11 +2296,11 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) /* * noac is a special case. It implies -o sync, but that's not * necessarily reflected in the mtab options. do_remount_sb - * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the + * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ if (data->flags & NFS_MOUNT_NOAC) - *flags |= MS_SYNCHRONOUS; + *flags |= SB_SYNCHRONOUS; /* compare new mount options with old ones */ error = nfs_compare_remount_data(nfss, data); @@ -2349,7 +2349,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; sb->s_time_gran = 1; sb->s_export_op = &nfs_export_ops; } @@ -2379,7 +2379,7 @@ static void nfs_clone_super(struct super_block *sb, /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; } nfs_initialise_sb(sb); @@ -2600,11 +2600,11 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; + sb_mntdata.mntflags |= SB_SYNCHRONOUS; if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) - if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; + if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) + sb_mntdata.mntflags |= SB_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); @@ -2641,7 +2641,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, if (error) goto error_splat_root; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; out: return mntroot; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f572538dcc4f..9f3ffba41533 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1979,7 +1979,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; - int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); + int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE); int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3ce20cd44a20..3073b646e1ba 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -141,7 +141,7 @@ void __nilfs_error(struct super_block *sb, const char *function, if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } @@ -869,7 +869,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; #endif nilfs_set_default_options(sb, sbp); @@ -1133,7 +1133,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; goto restore_opts; } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL); + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); err = -EINVAL; @@ -1143,12 +1143,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { /* Shutting down log writer */ nilfs_detach_log_writer(sb); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set @@ -1178,7 +1178,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); @@ -1212,7 +1212,7 @@ static int nilfs_parse_snapshot_option(const char *option, const char *msg = NULL; int err; - if (!(sd->flags & MS_RDONLY)) { + if (!(sd->flags & SB_RDONLY)) { msg = "read-only option is not specified"; goto parse_error; } @@ -1286,7 +1286,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, struct dentry *root_dentry; int err, s_new = false; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1327,14 +1327,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags, snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); sb_set_blocksize(s, block_size(sd.bdev)); - err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (err) goto failed_super; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; } else if (!sd.cno) { if (nilfs_tree_is_busy(s->s_root)) { - if ((flags ^ s->s_flags) & MS_RDONLY) { + if ((flags ^ s->s_flags) & SB_RDONLY) { nilfs_msg(s, KERN_ERR, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index afebb5067cec..1a85317e83f0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -220,7 +220,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) if (!valid_fs) { nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs"); - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { nilfs_msg(sb, KERN_INFO, "recovery required for readonly filesystem"); nilfs_msg(sb, KERN_INFO, @@ -286,7 +286,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) if (valid_fs) goto skip_recovery; - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { __u64 features; if (nilfs_test_opt(nilfs, NORECOVERY)) { @@ -309,7 +309,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) err = -EROFS; goto failed_unload; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } else if (nilfs_test_opt(nilfs, NORECOVERY)) { nilfs_msg(sb, KERN_ERR, "recovery cancelled because norecovery option was specified for a read/write mount"); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 81d8959b6aef..219b269c737e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -67,7 +67,7 @@ void fsnotify_unmount_inodes(struct super_block *sb) /* * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually + * doing an __iget/iput with SB_ACTIVE clear would actually * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. */ diff --git a/fs/nsfs.c b/fs/nsfs.c index ef243e14b6eb..7c6f76d29f56 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -255,5 +255,5 @@ void __init nsfs_init(void) nsfs_mnt = kern_mount(&nsfs); if (IS_ERR(nsfs_mnt)) panic("can't set nsfs up\n"); - nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; + nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3f70f041dbe9..bb7159f697f2 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -473,7 +473,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; #else /* NTFS_RW */ /* * For the read-write compiled driver, if we are remounting read-write, @@ -487,7 +487,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * When remounting read-only, mark the volume clean if no volume errors * have occurred. */ - if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) { static const char *es = ". Cannot remount read-write."; /* Remounting read-write. */ @@ -548,7 +548,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) NVolSetErrors(vol); return -EROFS; } - } else if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { + } else if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) { /* Remounting read-only. */ if (!NVolErrors(vol)) { if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) @@ -1799,7 +1799,7 @@ static bool load_system_files(ntfs_volume *vol) es3); goto iput_mirr_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", !vol->mftmirr_ino ? es1 : es2, es3); } else @@ -1937,7 +1937,7 @@ get_ctx_vol_failed: es1, es2); goto iput_vol_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1974,7 +1974,7 @@ get_ctx_vol_failed: } goto iput_logfile_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2019,7 +2019,7 @@ get_ctx_vol_failed: es1, es2); goto iput_root_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2042,7 +2042,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* * Do not set NVolErrors() because ntfs_remount() might manage * to set the dirty flag in which case all would be well. @@ -2055,7 +2055,7 @@ get_ctx_vol_failed: * If (still) a read-write mount, set the NT4 compatibility flag on * newer NTFS version volumes. */ - if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) && + if (!(sb->s_flags & SB_RDONLY) && (vol->major_ver > 1) && ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) { static const char *es1 = "Failed to set NT4 compatibility flag"; static const char *es2 = ". Run chkdsk."; @@ -2069,7 +2069,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif @@ -2087,7 +2087,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2128,7 +2128,7 @@ get_ctx_vol_failed: es1, es2); goto iput_quota_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2150,7 +2150,7 @@ get_ctx_vol_failed: goto iput_quota_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } /* @@ -2171,7 +2171,7 @@ get_ctx_vol_failed: es1, es2); goto iput_usnjrnl_err_out; } - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2194,7 +2194,7 @@ get_ctx_vol_failed: goto iput_usnjrnl_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ @@ -2728,7 +2728,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; #endif /* ! NTFS_RW */ /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index dc455d45a66a..a1d051055472 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -227,7 +227,7 @@ int ocfs2_should_update_atime(struct inode *inode, return 0; if ((inode->i_flags & S_NOATIME) || - ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) + ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))) return 0; /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 040bbb6a6e4b..80efa5699fb0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -675,9 +675,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } /* We're going to/from readonly mode. */ - if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { /* Disable quota accounting before remounting RO */ - if (*flags & MS_RDONLY) { + if (*flags & SB_RDONLY) { ret = ocfs2_susp_quotas(osb, 0); if (ret < 0) goto out; @@ -691,8 +691,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto unlock_osb; } - if (*flags & MS_RDONLY) { - sb->s_flags |= MS_RDONLY; + if (*flags & SB_RDONLY) { + sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; } else { if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { @@ -709,14 +709,14 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) ret = -EINVAL; goto unlock_osb; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; } trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags); unlock_osb: spin_unlock(&osb->osb_lock); /* Enable quota accounting after remounting RW */ - if (!ret && !(*flags & MS_RDONLY)) { + if (!ret && !(*flags & SB_RDONLY)) { if (sb_any_quota_suspended(sb)) ret = ocfs2_susp_quotas(osb, 1); else @@ -724,7 +724,7 @@ unlock_osb: if (ret < 0) { /* Return back changes... */ spin_lock(&osb->osb_lock); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; spin_unlock(&osb->osb_lock); goto out; @@ -744,9 +744,9 @@ unlock_osb: if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? - MS_POSIXACL : 0); + SB_POSIXACL : 0); } out: return ret; @@ -1057,10 +1057,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; - sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | - ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); - /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, + /* Hard readonly mode only if: bdev_read_only, SB_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { if (!sb_rdonly(sb)) { @@ -2057,7 +2057,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; /* this is needed to support O_LARGEFILE */ cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); @@ -2568,7 +2568,7 @@ static int ocfs2_handle_error(struct super_block *sb) return rv; pr_crit("OCFS2: File system is now read-only.\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; ocfs2_set_ro_flag(osb, 0); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5fdf269ba82e..c5898c59d411 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -901,7 +901,7 @@ static int ocfs2_xattr_list_entry(struct super_block *sb, case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: - if (!(sb->s_flags & MS_POSIXACL)) + if (!(sb->s_flags & SB_POSIXACL)) return 0; break; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 13215f26e321..2200662a9bf1 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -369,7 +369,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) static int openprom_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_NOATIME; + *flags |= SB_NOATIME; return 0; } @@ -386,7 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) struct op_inode_info *oi; int ret; - s->s_flags |= MS_NOATIME; + s->s_flags |= SB_NOATIME; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = OPENPROM_SUPER_MAGIC; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 366750eef201..36f1390b5ed7 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -40,7 +40,7 @@ static int orangefs_show_options(struct seq_file *m, struct dentry *root) { struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(root->d_sb); - if (root->d_sb->s_flags & MS_POSIXACL) + if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(m, ",acl"); if (orangefs_sb->flags & ORANGEFS_OPT_INTR) seq_puts(m, ",intr"); @@ -60,7 +60,7 @@ static int parse_mount_options(struct super_block *sb, char *options, * Force any potential flags that might be set from the mount * to zero, ie, initialize to unset. */ - sb->s_flags &= ~MS_POSIXACL; + sb->s_flags &= ~SB_POSIXACL; orangefs_sb->flags &= ~ORANGEFS_OPT_INTR; orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK; @@ -73,7 +73,7 @@ static int parse_mount_options(struct super_block *sb, char *options, token = match_token(p, tokens, args); switch (token) { case Opt_acl: - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; break; case Opt_intr: orangefs_sb->flags |= ORANGEFS_OPT_INTR; @@ -507,7 +507,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, ret = orangefs_fill_sb(sb, &new_op->downcall.resp.fs_mount, data, - flags & MS_SILENT ? 1 : 0); + flags & SB_SILENT ? 1 : 0); if (ret) { d = ERR_PTR(ret); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index be03578181d2..288d20f9a55a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -326,7 +326,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ofs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs)) + if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) return -EROFS; return 0; @@ -1190,7 +1190,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; if (!ofs->workdir) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; @@ -1203,7 +1203,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ofs->upper_mnt) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) ofs->same_sb = NULL; @@ -1213,7 +1213,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_free_oe; if (!ofs->indexdir) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* Show index=off/on in /proc/mounts for any of the reasons above */ @@ -1227,7 +1227,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; sb->s_xattr = ovl_xattr_handlers; sb->s_fs_info = ofs; - sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; + sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; err = -ENOMEM; root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 225f541f7078..dd0f82622427 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -483,7 +483,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent) /* User space would break if executables or devices appear on proc */ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; diff --git a/fs/proc/root.c b/fs/proc/root.c index 4e42aba97f2e..ede8e64974be 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -91,7 +91,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, { struct pid_namespace *ns; - if (flags & MS_KERNMOUNT) { + if (flags & SB_KERNMOUNT) { ns = data; data = NULL; } else { diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 7b635d173213..b786840facd9 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -45,10 +45,10 @@ struct proc_fs_info { static int show_sb_opts(struct seq_file *m, struct super_block *sb) { static const struct proc_fs_info fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { MS_LAZYTIME, ",lazytime" }, + { SB_SYNCHRONOUS, ",sync" }, + { SB_DIRSYNC, ",dirsync" }, + { SB_MANDLOCK, ",mand" }, + { SB_LAZYTIME, ",lazytime" }, { 0, NULL } }; const struct proc_fs_info *fs_infop; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 3a67cfb142d8..3d46fe302fcb 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -47,7 +47,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -199,7 +199,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) s->s_op = &qnx4_sops; s->s_magic = QNX4_SUPER_MAGIC; - s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ + s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ /* Check the superblock signature. Since the qnx4 code is dangerous, we should leave as quickly as possible diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 1192422a1c56..4aeb26bcb4d0 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -56,7 +56,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) static int qnx6_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -427,7 +427,7 @@ mmi_success: } s->s_op = &qnx6_sops; s->s_magic = QNX6_SUPER_MAGIC; - s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ + s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ /* ease the later tree level calculations */ sbi = QNX6_SB(s); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 11a48affa882..b13fc024d2ee 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2106,7 +2106,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, journal_end(th); goto out_inserted_sd; } - } else if (inode->i_sb->s_flags & MS_POSIXACL) { + } else if (inode->i_sb->s_flags & SB_POSIXACL) { reiserfs_warning(inode->i_sb, "jdm-13090", "ACLs aren't enabled in the fs, " "but vfs thinks they are!"); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 69ff280bdfe8..70057359fbaf 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1960,7 +1960,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, /* * Cancel flushing of old commits. Note that neither of these works * will be requeued because superblock is being shutdown and doesn't - * have MS_ACTIVE set. + * have SB_ACTIVE set. */ reiserfs_cancel_old_flush(sb); /* wait for all commits to finish */ @@ -4302,7 +4302,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) * Avoid queueing work when sb is being shut down. Transaction * will be flushed on journal shutdown. */ - if (sb->s_flags & MS_ACTIVE) + if (sb->s_flags & SB_ACTIVE) queue_delayed_work(REISERFS_SB(sb)->commit_wq, &journal->j_work, HZ / 10); } @@ -4393,7 +4393,7 @@ void reiserfs_abort_journal(struct super_block *sb, int errno) if (!journal->j_errno) journal->j_errno = errno; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; set_bit(J_ABORTED, &journal->j_state); #ifdef CONFIG_REISERFS_CHECK diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 64f49cafbc5b..7e288d97adcb 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -390,7 +390,7 @@ void __reiserfs_error(struct super_block *sb, const char *id, return; reiserfs_info(sb, "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, -EIO); } @@ -409,7 +409,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, error_buf); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, errno); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702..020c9cacbb2f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -121,7 +121,7 @@ void reiserfs_schedule_old_flush(struct super_block *s) * Avoid scheduling flush when sb is being shut down. It can race * with journal shutdown and free still queued delayed work. */ - if (sb_rdonly(s) || !(s->s_flags & MS_ACTIVE)) + if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE)) return; spin_lock(&sbi->old_work_lock); @@ -252,11 +252,11 @@ static int finish_unfinished(struct super_block *s) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ - if (s->s_flags & MS_ACTIVE) { + if (s->s_flags & SB_ACTIVE) { ms_active_set = 0; } else { ms_active_set = 1; - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; } /* Turn on quotas so that they are updated correctly */ for (i = 0; i < REISERFS_MAXQUOTAS; i++) { @@ -411,7 +411,7 @@ static int finish_unfinished(struct super_block *s) reiserfs_write_lock(s); if (ms_active_set) /* Restore the flag back */ - s->s_flags &= ~MS_ACTIVE; + s->s_flags &= ~SB_ACTIVE; #endif pathrelse(&path); if (done) @@ -1521,7 +1521,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) goto out_err_unlock; } - if (*mount_flags & MS_RDONLY) { + if (*mount_flags & SB_RDONLY) { reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); /* remount read-only */ @@ -1567,7 +1567,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); /* now it is safe to call journal_begin */ - s->s_flags &= ~MS_RDONLY; + s->s_flags &= ~SB_RDONLY; err = journal_begin(&th, s, 10); if (err) goto out_err_unlock; @@ -1575,7 +1575,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~MS_RDONLY; + s->s_flags &= ~SB_RDONLY; set_sb_umount_state(rs, REISERFS_ERROR_FS); if (!old_format_only(s)) set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); @@ -1590,7 +1590,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) goto out_err_unlock; reiserfs_write_unlock(s); - if (!(*mount_flags & MS_RDONLY)) { + if (!(*mount_flags & SB_RDONLY)) { dquot_resume(s, -1); reiserfs_write_lock(s); finish_unfinished(s); @@ -2055,7 +2055,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); - s->s_flags |= MS_RDONLY; + s->s_flags |= SB_RDONLY; } args.objectid = REISERFS_ROOT_OBJECTID; args.dirid = REISERFS_ROOT_PARENT_OBJECTID; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46492fb37a4c..5dbf5324bdda 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -959,7 +959,7 @@ int reiserfs_lookup_privroot(struct super_block *s) /* * We need to take a copy of the mount flags since things like - * MS_RDONLY don't get set until *after* we're called. + * SB_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ int reiserfs_xattr_init(struct super_block *s, int mount_flags) @@ -971,7 +971,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (err) goto error; - if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) { + if (d_really_is_negative(privroot) && !(mount_flags & SB_RDONLY)) { inode_lock(d_inode(s->s_root)); err = create_privroot(REISERFS_SB(s)->priv_root); inode_unlock(d_inode(s->s_root)); @@ -999,11 +999,11 @@ error: clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt); } - /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ + /* The super_block SB_POSIXACL must mirror the (no)acl mount option. */ if (reiserfs_posixacl(s)) - s->s_flags |= MS_POSIXACL; + s->s_flags |= SB_POSIXACL; else - s->s_flags &= ~MS_POSIXACL; + s->s_flags &= ~SB_POSIXACL; return err; } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 0186fe6d39f3..8f06fd1f3d69 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -451,7 +451,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int romfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } @@ -502,7 +502,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = 0xFFFFFFFF; sb->s_magic = ROMFS_MAGIC; - sb->s_flags |= MS_RDONLY | MS_NOATIME; + sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_op = &romfs_super_ops; #ifdef CONFIG_ROMFS_ON_MTD diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cf01e15a7b16..8a73b97217c8 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -195,7 +195,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) (u64) le64_to_cpu(sblk->id_table_start)); sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; sb->s_op = &squashfs_super_ops; err = -ENOMEM; @@ -373,7 +373,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int squashfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } diff --git a/fs/statfs.c b/fs/statfs.c index b072a8bab71a..5b2a24f0f263 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -35,11 +35,11 @@ static int flags_by_mnt(int mnt_flags) static int flags_by_sb(int s_flags) { int flags = 0; - if (s_flags & MS_SYNCHRONOUS) + if (s_flags & SB_SYNCHRONOUS) flags |= ST_SYNCHRONOUS; - if (s_flags & MS_MANDLOCK) + if (s_flags & SB_MANDLOCK) flags |= ST_MANDLOCK; - if (s_flags & MS_RDONLY) + if (s_flags & SB_RDONLY) flags |= ST_RDONLY; return flags; } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 20b8f82e115b..fb49510c5dcf 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -30,7 +30,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, void *ns; bool new_sb; - if (!(flags & MS_KERNMOUNT)) { + if (!(flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3c47b7d5d4cf..bec9f79adb25 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -63,7 +63,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) sync_filesystem(sb); if (sbi->s_forced_ro) - *flags |= MS_RDONLY; + *flags |= SB_RDONLY; return 0; } diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 0d56e486b392..89765ddfb738 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -333,7 +333,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) /* set up enough so that it can read an inode */ sb->s_op = &sysv_sops; if (sbi->s_forced_ro) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; if (sbi->s_truncate) sb->s_d_op = &sysv_dentry_operations; root_inode = sysv_iget(sb, SYSV_ROOT_INO); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a02aa59d1e24..dfe85069586e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1406,7 +1406,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, if (flags & S_MTIME) inode->i_mtime = *time; - if (!(inode->i_sb->s_flags & MS_LAZYTIME)) + if (!(inode->i_sb->s_flags & SB_LAZYTIME)) iflags |= I_DIRTY_SYNC; release = ui->dirty; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3be28900bf37..fe77e9625e84 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -84,7 +84,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) if (!c->ro_error) { c->ro_error = 1; c->no_chk_data_crc = 0; - c->vfs_sb->s_flags |= MS_RDONLY; + c->vfs_sb->s_flags |= SB_RDONLY; ubifs_warn(c, "switched to read-only mode, error %d", err); dump_stack(); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7503e7cdf870..0beb285b143d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -968,7 +968,7 @@ static int parse_standard_option(const char *option) pr_notice("UBIFS: parse %s\n", option); if (!strcmp(option, "sync")) - return MS_SYNCHRONOUS; + return SB_SYNCHRONOUS; return 0; } @@ -1160,8 +1160,8 @@ static int mount_ubifs(struct ubifs_info *c) size_t sz; c->ro_mount = !!sb_rdonly(c->vfs_sb); - /* Suppress error messages while probing if MS_SILENT is set */ - c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); + /* Suppress error messages while probing if SB_SILENT is set */ + c->probing = !!(c->vfs_sb->s_flags & SB_SILENT); err = init_constants_early(c); if (err) @@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } - if (c->ro_mount && !(*flags & MS_RDONLY)) { + if (c->ro_mount && !(*flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/W due to prior errors"); return -EROFS; @@ -1864,7 +1864,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) err = ubifs_remount_rw(c); if (err) return err; - } else if (!c->ro_mount && (*flags & MS_RDONLY)) { + } else if (!c->ro_mount && (*flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/O due to prior errors"); return -EROFS; @@ -2117,7 +2117,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, */ ubi = open_ubi(name, UBI_READONLY); if (IS_ERR(ubi)) { - if (!(flags & MS_SILENT)) + if (!(flags & SB_SILENT)) pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d", current->pid, name, (int)PTR_ERR(ubi)); return ERR_CAST(ubi); @@ -2143,18 +2143,18 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if (!!(flags & MS_RDONLY) != c1->ro_mount) { + if (!!(flags & SB_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } } else { - err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); if (err) goto out_deact; /* We do not support atime */ - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; #ifndef CONFIG_UBIFS_ATIME_SUPPORT - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; #else ubifs_msg(c, "full atime support is enabled."); #endif diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 63c7468147eb..5ee7af879cc4 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1201,7 +1201,7 @@ struct ubifs_debug_info; * @need_recovery: %1 if the file-system needs recovery * @replaying: %1 during journal replay * @mounting: %1 while mounting - * @probing: %1 while attempting to mount if MS_SILENT mount flag is set + * @probing: %1 while attempting to mount if SB_SILENT mount flag is set * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay @@ -1850,7 +1850,7 @@ __printf(2, 3) void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...); /* * A conditional variant of 'ubifs_err()' which doesn't output anything - * if probing (ie. MS_SILENT set). + * if probing (ie. SB_SILENT set). */ #define ubifs_errc(c, fmt, ...) \ do { \ diff --git a/fs/udf/super.c b/fs/udf/super.c index f80e0a0f24d3..f73239a9a97d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -650,7 +650,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sync_filesystem(sb); if (lvidiu) { int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); - if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) + if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & SB_RDONLY)) return -EACCES; } @@ -673,10 +673,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sbi->s_dmode = uopt.dmode; write_unlock(&sbi->s_cred_lock); - if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out_unlock; - if (*flags & MS_RDONLY) + if (*flags & SB_RDONLY) udf_close_lvid(sb); else udf_open_lvid(sb); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b5cd79065ef9..e727ee07dbe4 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -115,7 +115,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -205,7 +205,7 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { @@ -567,7 +567,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -688,7 +688,7 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 916b4a428933..e1ef0f0a1353 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -112,7 +112,7 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -146,14 +146,14 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bh); brelse(bh); } fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); @@ -284,7 +284,7 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); @@ -330,7 +330,7 @@ cg_found: ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bh); brelse(bh); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 6440003f8ddc..4d497e9c6883 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -282,7 +282,7 @@ void ufs_error (struct super_block * sb, const char * function, usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_mark_sb_dirty(sb); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } va_start(args, fmt); vaf.fmt = fmt; @@ -320,7 +320,7 @@ void ufs_panic (struct super_block * sb, const char * function, va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; pr_crit("panic (device %s): %s: %pV\n", sb->s_id, function, &vaf); va_end(args); @@ -905,7 +905,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=old is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -921,7 +921,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -937,7 +937,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep-cd is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -953,7 +953,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=openstep is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; @@ -968,7 +968,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=hp is supported read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } break; default: @@ -1125,21 +1125,21 @@ magic_found: break; case UFS_FSACTIVE: pr_err("%s(): fs is active\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; case UFS_FSBAD: pr_err("%s(): fs is bad\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; default: pr_err("%s(): can't grok fs_clean 0x%x\n", __func__, usb1->fs_clean); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; break; } } else { pr_err("%s(): fs needs fsck\n", __func__); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } /* @@ -1328,7 +1328,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) return -EINVAL; } - if ((bool)(*mount_flags & MS_RDONLY) == sb_rdonly(sb)) { + if ((bool)(*mount_flags & SB_RDONLY) == sb_rdonly(sb)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; mutex_unlock(&UFS_SB(sb)->s_lock); return 0; @@ -1337,7 +1337,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) /* * fs was mouted as rw, remounting ro */ - if (*mount_flags & MS_RDONLY) { + if (*mount_flags & SB_RDONLY) { ufs_put_super_internal(sb); usb1->fs_time = cpu_to_fs32(sb, get_seconds()); if ((flags & UFS_ST_MASK) == UFS_ST_SUN @@ -1346,7 +1346,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ubh_mark_buffer_dirty (USPI_UBH(uspi)); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else { /* * fs was mounted as ro, remounting rw @@ -1370,7 +1370,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) mutex_unlock(&UFS_SB(sb)->s_lock); return -EPERM; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 38d4227895ae..a503af96d780 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -781,17 +781,17 @@ xfs_log_mount_finish( * something to an unlinked inode, the irele won't cause * premature truncation and freeing of the inode, which results * in log recovery failure. We have to evict the unreferenced - * lru inodes after clearing MS_ACTIVE because we don't + * lru inodes after clearing SB_ACTIVE because we don't * otherwise clean up the lru if there's a subsequent failure in * xfs_mountfs, which leads to us leaking the inodes if nothing * else (e.g. quotacheck) references the inodes before the * mount failure occurs. */ - mp->m_super->s_flags |= MS_ACTIVE; + mp->m_super->s_flags |= SB_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); - mp->m_super->s_flags &= ~MS_ACTIVE; + mp->m_super->s_flags &= ~SB_ACTIVE; evict_inodes(mp->m_super); /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..5122d3021117 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -212,9 +212,9 @@ xfs_parseargs( */ if (sb_rdonly(sb)) mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & MS_DIRSYNC) + if (sb->s_flags & SB_DIRSYNC) mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) mp->m_flags |= XFS_MOUNT_WSYNC; /* @@ -1312,7 +1312,7 @@ xfs_fs_remount( } /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { if (mp->m_flags & XFS_MOUNT_NORECOVERY) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); @@ -1368,7 +1368,7 @@ xfs_fs_remount( } /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 5f2f32408011..fcc5dfc70aa0 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -30,7 +30,7 @@ extern void xfs_qm_exit(void); #ifdef CONFIG_XFS_POSIX_ACL # define XFS_ACL_STRING "ACLs, " -# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +# define set_posix_acl_flag(sb) ((sb)->s_flags |= SB_POSIXACL) #else # define XFS_ACL_STRING # define set_posix_acl_flag(sb) do { } while (0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2995a271ec46..bbd92da0946e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1872,7 +1872,7 @@ struct super_operations { */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) -static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; } +static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) diff --git a/include/uapi/linux/bfs_fs.h b/include/uapi/linux/bfs_fs.h index 73445ef07dda..940b04772af8 100644 --- a/include/uapi/linux/bfs_fs.h +++ b/include/uapi/linux/bfs_fs.h @@ -76,7 +76,7 @@ struct bfs_super_block { #define BFS_FILEBLOCKS(ip) \ ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) - le32_to_cpu((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & SB_RDONLY)) #endif /* _LINUX_BFS_FS_H */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d24025626310..9649ecd8a73a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -331,7 +331,7 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, void *data) { struct ipc_namespace *ns; - if (flags & MS_KERNMOUNT) { + if (flags & SB_KERNMOUNT) { ns = data; data = NULL; } else { diff --git a/mm/shmem.c b/mm/shmem.c index 4aa9307feab0..7fbe67be86fa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3776,7 +3776,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) * tmpfs instance, limiting inodes to one per page of lowmem; * but the internal instance is left unlimited. */ - if (!(sb->s_flags & MS_KERNMOUNT)) { + if (!(sb->s_flags & SB_KERNMOUNT)) { sbinfo->max_blocks = shmem_default_max_blocks(); sbinfo->max_inodes = shmem_default_max_inodes(); if (shmem_parse_options(data, sbinfo, false)) { @@ -3784,12 +3784,12 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) goto failed; } } else { - sb->s_flags |= MS_NOUSER; + sb->s_flags |= SB_NOUSER; } sb->s_export_op = &shmem_export_ops; - sb->s_flags |= MS_NOSEC; + sb->s_flags |= SB_NOSEC; #else - sb->s_flags |= MS_NOUSER; + sb->s_flags |= SB_NOUSER; #endif spin_lock_init(&sbinfo->stat_lock); @@ -3809,7 +3809,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = shmem_xattr_handlers; #endif #ifdef CONFIG_TMPFS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif uuid_gen(&sb->s_uuid); diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 8542e9a55e1b..d4fa04d91439 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2451,7 +2451,7 @@ static int __init aa_create_aafs(void) aafs_mnt = kern_mount(&aafs_ops); if (IS_ERR(aafs_mnt)) panic("can't set apparmorfs up\n"); - aafs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; + aafs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; /* Populate fs tree. */ error = entry_create_dir(&aa_sfs_entry, NULL); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f546707a2bbb..6505e1ad9e23 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -86,7 +86,7 @@ static inline unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, static inline bool path_mediated_fs(struct dentry *dentry) { - return !(dentry->d_sb->s_flags & MS_NOUSER); + return !(dentry->d_sb->s_flags & SB_NOUSER); } -- cgit v1.2.3 From a39e17b2d842938e19997d2fdc0443fdd4cd8d10 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Nov 2017 12:10:23 -0800 Subject: bpf: offload: add a license header I forgot to add a license on kernel/bpf/offload.c. Luckily I'm still the only author so make it explicitly GPLv2. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 68ec884440b7..8455b89d1bbf 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -1,3 +1,18 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + #include #include #include -- cgit v1.2.3 From 15ca08d3299682dc49bad73251677b2c5017ef08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:10 -0400 Subject: nfsd: Fix stateid races between OPEN and CLOSE Open file stateids can linger on the nfs4_file list of stateids even after they have been closed. In order to avoid reusing such a stateid, and confusing the client, we need to recheck the nfs4_stid's type after taking the mutex. Otherwise, we risk reusing an old stateid that was already closed, which will confuse clients that expect new stateids to conform to RFC7530 Sections 9.1.4.2 and 16.2.5 or RFC5661 Sections 8.2.2 and 18.2.4. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b82817767b9d..ee8fde2dfa92 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3562,7 +3562,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - if (local->st_stateowner == &oo->oo_owner) { + if (local->st_stateowner != &oo->oo_owner) + continue; + if (local->st_stid.sc_type == NFS4_OPEN_STID) { ret = local; refcount_inc(&ret->st_stid.sc_count); break; @@ -3571,6 +3573,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + switch (s->sc_type) { + default: + break; + case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: + ret = nfserr_bad_stateid; + break; + case NFS4_REVOKED_DELEG_STID: + ret = nfserr_deleg_revoked; + } + return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ + __be32 ret; + + mutex_lock(&stp->st_mutex); + ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret != nfs_ok) + mutex_unlock(&stp->st_mutex); + return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *stp; + for (;;) { + spin_lock(&fp->fi_lock); + stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); + if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) + break; + nfs4_put_stid(&stp->st_stid); + } + return stp; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3615,6 +3663,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); +retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3639,7 +3688,11 @@ out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { - mutex_lock(&retstp->st_mutex); + /* Handle races with CLOSE */ + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; @@ -4460,9 +4513,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; - spin_lock(&fp->fi_lock); - stp = nfsd4_find_existing_open(fp, open); - spin_unlock(&fp->fi_lock); + stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4476,7 +4527,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); @@ -5367,7 +5417,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) bool unhashed; LIST_HEAD(reaplist); - s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -5407,10 +5456,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + + stp->st_stid.sc_type = NFS4_CLOSED_STID; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); + mutex_unlock(&stp->st_mutex); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); -- cgit v1.2.3 From d8a1a000555ecd1b824ac1ed6df8fe364dfbbbb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:11 -0400 Subject: nfsd: Fix another OPEN stateid race If nfsd4_process_open2() is initialising a new stateid, and yet the call to nfs4_get_vfs_file() fails for some reason, then we must declare the stateid closed, and unhash it before dropping the mutex. Right now, we unhash the stateid after dropping the mutex, and without changing the stateid type, meaning that another OPEN could theoretically look it up and attempt to use it. Reported-by: Andrew W Elble Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ee8fde2dfa92..457f0e7ece74 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4502,6 +4502,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; + bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, @@ -4521,11 +4522,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } + if (!stp) { + stp = init_open_stateid(fp, open); + if (!open->op_stp) + new_stp = true; + } + /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. + * + * stp is already locked. */ - if (stp) { + if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { @@ -4533,22 +4542,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - /* stp is returned locked. */ - stp = init_open_stateid(fp, open); - /* See if we lost the race to some other thread */ - if (stp->st_access_bmap != 0) { - status = nfs4_upgrade_open(rqstp, fp, current_fh, - stp, open); - if (status) { - mutex_unlock(&stp->st_mutex); - goto out; - } - goto upgrade_out; - } status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - mutex_unlock(&stp->st_mutex); + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); + mutex_unlock(&stp->st_mutex); goto out; } @@ -4557,7 +4555,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } -upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); -- cgit v1.2.3 From fb500a7cfee7f2f447d2bbf30cb59629feab6ac1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:12 -0400 Subject: nfsd: CLOSE SHOULD return the invalid special stateid for NFSv4.x (x>0) Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 457f0e7ece74..afc04b9784a0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,6 +63,9 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; @@ -5461,6 +5464,11 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); + /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: -- cgit v1.2.3 From 659aefb68eca28ba9aa482a9fc64de107332e256 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:13 -0400 Subject: nfsd: Ensure we don't recognise lock stateids after freeing them In order to deal with lookup races, nfsd4_free_lock_stateid() needs to be able to signal to other stateful functions that the lock stateid is no longer valid. Right now, nfsd_lock() will check whether or not an existing stateid is still hashed, but only in the "new lock" path. To ensure the stateid invalidation is also recognised by the "existing lock" path, and also by a second call to nfsd4_free_lock_stateid() itself, we can change the type to NFS4_CLOSED_STID under the stp->st_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index afc04b9784a0..6542b57ecc86 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5149,7 +5149,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) struct nfs4_ol_stateid *stp = openlockstateid(s); __be32 ret; - mutex_lock(&stp->st_mutex); + ret = nfsd4_lock_ol_stateid(stp); + if (ret) + goto out_put_stid; ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) @@ -5160,11 +5162,13 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) lockowner(stp->st_stateowner))) goto out; + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(stp); ret = nfs_ok; out: mutex_unlock(&stp->st_mutex); +out_put_stid: nfs4_put_stid(s); return ret; } @@ -5733,6 +5737,8 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) lockdep_assert_held(&clp->cl_lock); list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_type != NFS4_LOCK_STID) + continue; if (lst->st_stid.sc_file == fp) { refcount_inc(&lst->st_stid.sc_count); return lst; @@ -5807,7 +5813,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_lockowner *lo; struct nfs4_ol_stateid *lst; unsigned int strhashval; - bool hashed; lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) { @@ -5830,15 +5835,7 @@ retry: goto out; } - mutex_lock(&lst->st_mutex); - - /* See if it's still hashed to avoid race with FREE_STATEID */ - spin_lock(&cl->cl_lock); - hashed = !list_empty(&lst->st_perfile); - spin_unlock(&cl->cl_lock); - - if (!hashed) { - mutex_unlock(&lst->st_mutex); + if (nfsd4_lock_ol_stateid(lst) != nfs_ok) { nfs4_put_stid(&lst->st_stid); goto retry; } -- cgit v1.2.3 From fd1fd685b30867122aafbe2998ce4065c8c87e8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:14 -0400 Subject: nfsd4: move find_lock_stateid Trivial cleanup to simplify following patch. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6542b57ecc86..49a9741fa132 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5703,6 +5703,25 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, return ret; } +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *lst; + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_type != NFS4_LOCK_STID) + continue; + if (lst->st_stid.sc_file == fp) { + refcount_inc(&lst->st_stid.sc_count); + return lst; + } + } + return NULL; +} + static void init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, @@ -5728,25 +5747,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, spin_unlock(&fp->fi_lock); } -static struct nfs4_ol_stateid * -find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) -{ - struct nfs4_ol_stateid *lst; - struct nfs4_client *clp = lo->lo_owner.so_client; - - lockdep_assert_held(&clp->cl_lock); - - list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_stid.sc_type != NFS4_LOCK_STID) - continue; - if (lst->st_stid.sc_file == fp) { - refcount_inc(&lst->st_stid.sc_count); - return lst; - } - } - return NULL; -} - static struct nfs4_ol_stateid * find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct inode *inode, struct nfs4_ol_stateid *ost, -- cgit v1.2.3 From beeca19cf1249a917ed737729dc92337c7633bea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:14 -0400 Subject: nfsd: Fix race in lock stateid creation If we're looking up a new lock state, and the creation fails, then we want to unhash it, just like we do for OPEN. However in order to do so, we need to that no other LOCK requests can grab the mutex until we have unhashed it (and marked it as closed). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 49a9741fa132..6d5993caf9bc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5722,14 +5722,22 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) return NULL; } -static void +static struct nfs4_ol_stateid * init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, struct nfs4_ol_stateid *open_stp) { struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfs4_ol_stateid *retstp; - lockdep_assert_held(&clp->cl_lock); + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); +retry: + spin_lock(&clp->cl_lock); + spin_lock(&fp->fi_lock); + retstp = find_lock_stateid(lo, fp); + if (retstp) + goto out_unlock; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; @@ -5739,12 +5747,22 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); +out_unlock: spin_unlock(&fp->fi_lock); + spin_unlock(&clp->cl_lock); + if (retstp) { + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + stp = retstp; + } + return stp; } static struct nfs4_ol_stateid * @@ -5757,26 +5775,25 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *clp = oo->oo_owner.so_client; + *new = false; spin_lock(&clp->cl_lock); lst = find_lock_stateid(lo, fi); - if (lst == NULL) { - spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); - if (ns == NULL) - return NULL; - - spin_lock(&clp->cl_lock); - lst = find_lock_stateid(lo, fi); - if (likely(!lst)) { - lst = openlockstateid(ns); - init_lock_stateid(lst, lo, fi, inode, ost); - ns = NULL; - *new = true; - } - } spin_unlock(&clp->cl_lock); - if (ns) + if (lst != NULL) { + if (nfsd4_lock_ol_stateid(lst) == nfs_ok) + goto out; + nfs4_put_stid(&lst->st_stid); + } + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); + if (ns == NULL) + return NULL; + + lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost); + if (lst == openlockstateid(ns)) + *new = true; + else nfs4_put_stid(ns); +out: return lst; } @@ -5828,17 +5845,12 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, goto out; } -retry: lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (lst == NULL) { status = nfserr_jukebox; goto out; } - if (nfsd4_lock_ol_stateid(lst) != nfs_ok) { - nfs4_put_stid(&lst->st_stid); - goto retry; - } status = nfs_ok; *plst = lst; out: @@ -6044,14 +6056,16 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - mutex_unlock(&lock_stp->st_mutex); - /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ - if (status && new) + if (status && new) { + lock_stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(lock_stp); + } + + mutex_unlock(&lock_stp->st_mutex); nfs4_put_stid(&lock_stp->st_stid); } -- cgit v1.2.3 From 9271d7e509c1bfc0b9a418caec29ec8d1ac38270 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:15 -0400 Subject: nfsd: Ensure we check stateid validity in the seqid operation checks After taking the stateid st_mutex, we want to know that the stateid still represents valid state before performing any non-idempotent actions. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6d5993caf9bc..da07c24e9372 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5234,15 +5234,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); -- cgit v1.2.3 From 03da3169c67f67729d8b52cee4e5689b0a3f94c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:16 -0400 Subject: nfsd: Fix races with check_stateid_generation() The various functions that call check_stateid_generation() in order to compare a client-supplied stateid with the nfs4_stid state, usually need to atomically check for closed state. Those that perform the check after locking the st_mutex using nfsd4_lock_ol_stateid() should now be OK, but we do want to fix up the others. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da07c24e9372..ecb803d7c966 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4906,6 +4906,18 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s return nfserr_old_stateid; } +static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session) +{ + __be32 ret; + + spin_lock(&s->sc_lock); + ret = nfsd4_verify_open_stid(s); + if (ret == nfs_ok) + ret = check_stateid_generation(in, &s->sc_stateid, has_session); + spin_unlock(&s->sc_lock); + return ret; +} + static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) { if (ols->st_stateowner->so_is_open_owner && @@ -4934,7 +4946,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; - status = check_stateid_generation(stateid, &s->sc_stateid, 1); + status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock; switch (s->sc_type) { @@ -5095,7 +5107,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, + status = nfsd4_stid_check_stateid_generation(stateid, s, nfsd4_has_session(cstate)); if (status) goto out; @@ -5188,6 +5200,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; + spin_lock(&s->sc_lock); switch (s->sc_type) { case NFS4_DELEG_STID: ret = nfserr_locks_held; @@ -5199,11 +5212,13 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = nfserr_locks_held; break; case NFS4_LOCK_STID: + spin_unlock(&s->sc_lock); refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; case NFS4_REVOKED_DELEG_STID: + spin_unlock(&s->sc_lock); dp = delegstateid(s); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); @@ -5212,6 +5227,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; /* Default falls through and returns nfserr_bad_stateid */ } + spin_unlock(&s->sc_lock); out_unlock: spin_unlock(&cl->cl_lock); out: @@ -5491,7 +5507,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; dp = delegstateid(s); - status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); + status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate)); if (status) goto put_stateid; -- cgit v1.2.3 From ba589528d6a251721a245fad07c0e3090fc9d6f5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:55:22 +0300 Subject: nfsd: remove net pointer from debug messages Publishing of net pointer is not safe, replace it in debug meesages by net->ns.inum [ 119.989161] nfsd: initializing export module (net: f00001e7). [ 171.767188] NFSD: starting 90-second grace period (net f00001e7) [ 322.185240] nfsd: shutting down export module (net: f00001e7). [ 322.186062] nfsd: export shutdown complete (net: f00001e7). Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 46b48dbbdd32..f9e7dd57cc12 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1230,7 +1230,7 @@ nfsd_export_init(struct net *net) int rv; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: initializing export module (net: %p).\n", net); + dprintk("nfsd: initializing export module (net: %x).\n", net->ns.inum); nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net); if (IS_ERR(nn->svc_export_cache)) @@ -1278,7 +1278,7 @@ nfsd_export_shutdown(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: shutting down export module (net: %p).\n", net); + dprintk("nfsd: shutting down export module (net: %x).\n", net->ns.inum); cache_unregister_net(nn->svc_expkey_cache, net); cache_unregister_net(nn->svc_export_cache, net); @@ -1286,5 +1286,5 @@ nfsd_export_shutdown(struct net *net) cache_destroy_net(nn->svc_export_cache, net); svcauth_unix_purge(net); - dprintk("nfsd: export shutdown complete (net: %p).\n", net); + dprintk("nfsd: export shutdown complete (net: %x).\n", net->ns.inum); } -- cgit v1.2.3 From e919b0765287f36f7e9c84aed785eddfbbd98dfa Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:55:55 +0300 Subject: lockd: remove net pointer from messages Publishing of net pointer is not safe, use net->ns.inum as net ID in debug messages [ 171.757678] lockd_up_net: per-net data created; net=f00001e7 [ 171.767188] NFSD: starting 90-second grace period (net f00001e7) [ 300.653313] lockd: nuking all hosts in net f00001e7... [ 300.653641] lockd: host garbage collection for net f00001e7 [ 300.653968] lockd: nlmsvc_mark_resources for net f00001e7 [ 300.711483] lockd_down_net: per-net data destroyed; net=f00001e7 [ 300.711847] lockd: nuking all hosts in net 0... [ 300.711847] lockd: host garbage collection for net 0 [ 300.711848] lockd: nlmsvc_mark_resources for net 0 Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 21 +++++++++++++-------- fs/lockd/mon.c | 3 ++- fs/lockd/svc.c | 9 +++++---- fs/lockd/svcsubs.c | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0d4e590e0549..f90f6d5082ef 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -578,8 +578,10 @@ static void nlm_complain_hosts(struct net *net) if (ln->nrhosts == 0) return; - printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); - dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); + pr_warn("lockd: couldn't shutdown host module for net %x!\n", + net->ns.inum); + dprintk("lockd: %lu hosts left in net %x:\n", ln->nrhosts, + net->ns.inum); } else { if (nrhosts == 0) return; @@ -590,9 +592,9 @@ static void nlm_complain_hosts(struct net *net) for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; - dprintk(" %s (cnt %d use %d exp %ld net %p)\n", + dprintk(" %s (cnt %d use %d exp %ld net %x)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); + host->h_inuse, host->h_expires, host->net->ns.inum); } } @@ -605,7 +607,8 @@ nlm_shutdown_hosts_net(struct net *net) mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ - dprintk("lockd: nuking all hosts in net %p...\n", net); + dprintk("lockd: nuking all hosts in net %x...\n", + net ? net->ns.inum : 0); for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; @@ -646,7 +649,8 @@ nlm_gc_hosts(struct net *net) struct hlist_node *next; struct nlm_host *host; - dprintk("lockd: host garbage collection for net %p\n", net); + dprintk("lockd: host garbage collection for net %x\n", + net ? net->ns.inum : 0); for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; @@ -662,9 +666,10 @@ nlm_gc_hosts(struct net *net) if (atomic_read(&host->h_count) || host->h_inuse || time_before(jiffies, host->h_expires)) { dprintk("nlm_gc_hosts skipping %s " - "(cnt %d use %d exp %ld net %p)\n", + "(cnt %d use %d exp %ld net %x)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); + host->h_inuse, host->h_expires, + host->net->ns.inum); continue; } nlm_destroy_host_locked(host); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 9fbbd11f9ecb..96cfb2967ac7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -110,7 +110,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, clnt = nsm_create(host->net, host->nodename); if (IS_ERR(clnt)) { dprintk("lockd: failed to create NSM upcall transport, " - "status=%ld, net=%p\n", PTR_ERR(clnt), host->net); + "status=%ld, net=%x\n", PTR_ERR(clnt), + host->net->ns.inum); return PTR_ERR(clnt); } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a8e3777c94dc..e28796bd7375 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -259,7 +259,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) if (error < 0) goto err_bind; set_grace_period(net); - dprintk("lockd_up_net: per-net data created; net=%p\n", net); + dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum); return 0; err_bind: @@ -275,11 +275,12 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); svc_shutdown_net(serv, net); - dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); + dprintk("%s: per-net data destroyed; net=%x\n", + __func__, net->ns.inum); } } else { - printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", - nlmsvc_task, net); + pr_err("%s: no users! task=%p, net=%x\n", + __func__, nlmsvc_task, net->ns.inum); BUG(); } } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a563ddbc19e6..4ec3d6e03e76 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -370,7 +370,7 @@ nlmsvc_mark_resources(struct net *net) { struct nlm_host hint; - dprintk("lockd: nlmsvc_mark_resources for net %p\n", net); + dprintk("lockd: %s for net %x\n", __func__, net ? net->ns.inum : 0); hint.net = net; nlm_traverse_files(&hint, nlmsvc_mark_host, NULL); } -- cgit v1.2.3 From 4f34bd0540d290d1fe1bc699550025623b2761ef Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Wed, 8 Nov 2017 17:29:51 -0500 Subject: nfsd: fix locking validator warning on nfs4_ol_stateid->st_mutex class The use of the st_mutex has been confusing the validator. Use the proper nested notation so as to not produce warnings. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ecb803d7c966..98342d262771 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -86,6 +86,11 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid); */ static DEFINE_SPINLOCK(state_lock); +enum nfsd4_st_mutex_lock_subclass { + OPEN_STATEID_MUTEX = 0, + LOCK_STATEID_MUTEX = 1, +}; + /* * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for * the refcount on the open stateid to drop. @@ -3600,7 +3605,7 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) { __be32 ret; - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); ret = nfsd4_verify_open_stid(&stp->st_stid); if (ret != nfs_ok) mutex_unlock(&stp->st_mutex); @@ -3664,7 +3669,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&oo->oo_owner.so_client->cl_lock); @@ -5741,7 +5746,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_ol_stateid *retstp; mutex_init(&stp->st_mutex); - mutex_lock(&stp->st_mutex); + mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); -- cgit v1.2.3 From b872285751c1af010e12d02bce7069e2061a58ca Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:22:48 +0300 Subject: grace: replace BUG_ON by WARN_ONCE in exit_net hook Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 897b299db55e..bd3e2d328e64 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -104,7 +104,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { -- cgit v1.2.3 From a3152f1440c762128a626d90718ba6535c81c1b4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:23:24 +0300 Subject: lockd: added cleanup checks in exit_net hook Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e28796bd7375..cb659ef789c0 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -677,6 +677,17 @@ static int lockd_init_net(struct net *net) static void lockd_exit_net(struct net *net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + WARN_ONCE(!list_empty(&ln->lockd_manager.list), + "net %x %s: lockd_manager.list is not empty\n", + net->ns.inum, __func__); + WARN_ONCE(!list_empty(&ln->nsm_handles), + "net %x %s: nsm_handles list is not empty\n", + net->ns.inum, __func__); + WARN_ONCE(delayed_work_pending(&ln->grace_period_end), + "net %x %s: grace_period_end was not cancelled\n", + net->ns.inum, __func__); } static struct pernet_operations lockd_net_ops = { -- cgit v1.2.3 From 3a2b19d1ee5633f76ae8a88da7bc039a5d1732aa Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Nov 2017 13:03:42 +0300 Subject: lockd: lost rollback of set_grace_period() in lockd_down_net() Commit efda760fe95ea ("lockd: fix lockd shutdown race") is incorrect, it removes lockd_manager and disarm grace_period_end for init_net only. If nfsd was started from another net namespace lockd_up_net() calls set_grace_period() that adds lockd_manager into per-netns list and queues grace_period_end delayed work. These action should be reverted in lockd_down_net(). Otherwise it can lead to double list_add on after restart nfsd in netns, and to use-after-free if non-disarmed delayed work will be executed after netns destroy. Fixes: efda760fe95e ("lockd: fix lockd shutdown race") Cc: stable@vger.kernel.org Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index cb659ef789c0..4acf0ebd9802 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -274,6 +274,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("%s: per-net data destroyed; net=%x\n", __func__, net->ns.inum); -- cgit v1.2.3 From 64ebe12494fd5d193f014ce38e1fd83cc57883c8 Mon Sep 17 00:00:00 2001 From: Naofumi Honda Date: Thu, 9 Nov 2017 10:57:16 -0500 Subject: nfsd: fix panic in posix_unblock_lock called from nfs4_laundromat From kernel 4.9, my two nfsv4 servers sometimes suffer from "panic: unable to handle kernel page request" in posix_unblock_lock() called from nfs4_laundromat(). These panics diseappear if we revert the commit "nfsd: add a LRU list for blocked locks". The cause appears to be a typo in nfs4_laundromat(), which is also present in nfs4_state_shutdown_net(). Cc: stable@vger.kernel.org Fixes: 7919d0a27f1e "nfsd: add a LRU list for blocked locks" Cc: jlayton@redhat.com Reveiwed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 98342d262771..d0992d59f4e1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4790,7 +4790,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); @@ -7236,7 +7236,7 @@ nfs4_state_shutdown_net(struct net *net) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); -- cgit v1.2.3 From ae254dac721d44c0bfebe2795df87459e2e88219 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 9 Nov 2017 13:41:10 -0500 Subject: nfsd: check for use of the closed special stateid Prevent the use of the closed (invalid) special stateid by clients. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0992d59f4e1..73451436c1c2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -72,6 +72,7 @@ static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -4936,7 +4937,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4994,7 +4996,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { -- cgit v1.2.3 From d34971a65b72619508f49cd237283e92f1c329d5 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:23 +0200 Subject: sunrpc: make the function arg as const Make the struct cache_detail *tmpl argument of the function cache_create_net as const as it is only getting passed to kmemup having the argument as const void *. Add const to the prototype too. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- net/sunrpc/cache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 270bad0e1bed..40d2822f0e2f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -213,7 +213,7 @@ extern void __init cache_initialize(void); extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); -extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net); extern void cache_destroy_net(struct cache_detail *cd, struct net *net); extern void sunrpc_init_cache_detail(struct cache_detail *cd); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 79d55d949d9a..e68943895be4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) +struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; int i; -- cgit v1.2.3 From ae2e408ec2e861b0e7dceea1808e4305a9381c2f Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:25 +0200 Subject: NFSD: make cache_detail structures const Make these const as they are only getting passed to the function cache_create_net having the argument as const. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 4 ++-- fs/nfsd/nfs4idmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index f9e7dd57cc12..8ceb25a10ea0 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -232,7 +232,7 @@ static struct cache_head *expkey_alloc(void) return NULL; } -static struct cache_detail svc_expkey_cache_template = { +static const struct cache_detail svc_expkey_cache_template = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", @@ -748,7 +748,7 @@ static struct cache_head *svc_export_alloc(void) return NULL; } -static struct cache_detail svc_export_cache_template = { +static const struct cache_detail svc_export_cache_template = { .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 6b9b6cca469f..a5bb76593ce7 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -178,7 +178,7 @@ static struct ent *idtoname_lookup(struct cache_detail *, struct ent *); static struct ent *idtoname_update(struct cache_detail *, struct ent *, struct ent *); -static struct cache_detail idtoname_cache_template = { +static const struct cache_detail idtoname_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", @@ -341,7 +341,7 @@ static struct ent *nametoid_update(struct cache_detail *, struct ent *, struct ent *); static int nametoid_parse(struct cache_detail *, char *, int); -static struct cache_detail nametoid_cache_template = { +static const struct cache_detail nametoid_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", -- cgit v1.2.3 From ee24eac3ebb781c12a654985e33ecaa07f4d0f95 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:26 +0200 Subject: SUNRPC: make cache_detail structures const Make these const as they are only getting passed to the function cache_create_net having the argument as const. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svcauth_unix.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73165e9ca5bf..5dd4e6c9fef2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -264,7 +264,7 @@ out: return status; } -static struct cache_detail rsi_cache_template = { +static const struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", @@ -524,7 +524,7 @@ out: return status; } -static struct cache_detail rsc_cache_template = { +static const struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .name = "auth.rpcsec.context", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f81eaa8e0888..740b67d5a733 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -569,7 +569,7 @@ static int unix_gid_show(struct seq_file *m, return 0; } -static struct cache_detail unix_gid_cache_template = { +static const struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, .name = "auth.unix.gid", @@ -862,7 +862,7 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; -static struct cache_detail ip_map_cache_template = { +static const struct cache_detail ip_map_cache_template = { .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .name = "auth.unix.ip", -- cgit v1.2.3 From 6b18dd1c03e07262ea0866084856b2a3c5ba8d09 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Nov 2017 10:19:26 +0300 Subject: race of lockd inetaddr notifiers vs nlmsvc_rqst change lockd_inet[6]addr_event use nlmsvc_rqst without taken nlmsvc_mutex, nlmsvc_rqst can be changed during execution of notifiers and crash the host. Patch enables access to nlmsvc_rqst only when it was correctly initialized and delays its cleanup until notifiers are no longer in use. Note that nlmsvc_rqst can be temporally set to ERR_PTR, so the "if (nlmsvc_rqst)" check in notifiers is insufficient on its own. Signed-off-by: Vasily Averin Tested-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 4acf0ebd9802..9c36d614bf89 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,6 +57,9 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); + unsigned int lockd_net_id; /* @@ -293,7 +296,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -304,6 +308,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -320,7 +326,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -332,6 +339,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin6); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -348,10 +357,12 @@ static void lockd_unregister_notifiers(void) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif + wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); } static void lockd_svc_exit_thread(void) { + atomic_dec(&nlm_ntf_refcnt); lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -376,6 +387,7 @@ static int lockd_start_svc(struct svc_serv *serv) goto out_rqst; } + atomic_inc(&nlm_ntf_refcnt); svc_sock_update_bufs(serv); serv->sv_maxconn = nlm_max_connections; -- cgit v1.2.3 From 2317dc557a3b6d5b73b697034611d658eb2cbde9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Nov 2017 10:19:35 +0300 Subject: race of nfsd inetaddr notifiers vs nn->nfsd_serv change nfsd_inet[6]addr_event uses nn->nfsd_serv without taking nfsd_mutex, which can be changed during execution of notifiers and crash the host. Moreover if notifiers were enabled in one net namespace they are enabled in all other net namespaces, from creation until destruction. This patch allows notifiers to access nn->nfsd_serv only after the pointer is correctly initialized and delays cleanup until notifiers are no longer in use. Signed-off-by: Vasily Averin Tested-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 3 +++ fs/nfsd/nfsctl.c | 3 +++ fs/nfsd/nfssvc.c | 14 +++++++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1c91391f4805..36358d435cb0 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -119,6 +119,9 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; + + wait_queue_head_t ntf_wq; + atomic_t ntf_refcnt; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6493df6b1bd5..d107b4426f7e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1241,6 +1241,9 @@ static __net_init int nfsd_init_net(struct net *net) nn->nfsd4_grace = 90; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); + + atomic_set(&nn->ntf_refcnt, 0); + init_waitqueue_head(&nn->ntf_wq); return 0; out_idmap_error: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 33117d4ffce0..89cb484f1cfb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -335,7 +335,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; if (nn->nfsd_serv) { @@ -344,6 +345,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; @@ -363,7 +366,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; if (nn->nfsd_serv) { @@ -374,7 +378,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; } @@ -391,6 +396,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -398,6 +404,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } + wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -517,6 +524,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } + atomic_inc(&nn->ntf_refcnt); ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ return 0; } -- cgit v1.2.3 From 9e137ed5abcb21c0efffb1b6a7c48f2d21980303 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 30 Oct 2017 16:47:58 +0300 Subject: nlm_shutdown_hosts_net() cleanup nlm_complain_hosts() walks through nlm_server_hosts hlist, which should be protected by nlm_host_mutex. Signed-off-by: Vasily Averin Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f90f6d5082ef..826a89184f90 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -621,9 +621,8 @@ nlm_shutdown_hosts_net(struct net *net) /* Then, perform a garbage collection pass */ nlm_gc_hosts(net); - mutex_unlock(&nlm_host_mutex); - nlm_complain_hosts(net); + mutex_unlock(&nlm_host_mutex); } /* -- cgit v1.2.3 From 81833de1a46edce9ca20cfe079872ac1c20ef359 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 13 Nov 2017 07:25:40 +0300 Subject: lockd: fix "list_add double add" caused by legacy signal interface restart_grace() uses hardcoded init_net. It can cause to "list_add double add" in following scenario: 1) nfsd and lockd was started in several net namespaces 2) nfsd in init_net was stopped (lockd was not stopped because it have users from another net namespaces) 3) lockd got signal, called restart_grace() -> set_grace_period() and enabled lock_manager in hardcoded init_net. 4) nfsd in init_net is started again, its lockd_up() calls set_grace_period() and tries to add lock_manager into init_net 2nd time. Jeff Layton suggest: "Make it safe to call locks_start_grace multiple times on the same lock_manager. If it's already on the global grace_list, then don't try to add it again. (But we don't intentionally add twice, so for now we WARN about that case.) With this change, we also need to ensure that the nfsd4 lock manager initializes the list before we call locks_start_grace. While we're at it, move the rest of the nfsd_net initialization into nfs4_state_create_net. I see no reason to have it spread over two functions like it is today." Suggested patch was updated to generate warning in described situation. Suggested-by: Jeff Layton Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 6 +++++- fs/nfsd/nfs4state.c | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index bd3e2d328e64..5be08f02a76b 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 73451436c1c2..b29b5a185a2c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7103,6 +7103,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7160,9 +7164,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", -- cgit v1.2.3 From c30bf8cecebabb119aa1fbb17de84f924a8ab8f7 Mon Sep 17 00:00:00 2001 From: Gustavo A R Silva Date: Wed, 18 Oct 2017 15:34:25 -0500 Subject: i40e/virtchnl: fix application of sizeof to pointer sizeof when applied to a pointer typed expression gives the size of the pointer. The proper fix in this particular case is to code sizeof(*vfres) instead of sizeof(vfres). This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A R Silva Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a3dc9b932946..36cb8e068e85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2086,7 +2086,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) } return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, - (u8 *)vfres, sizeof(vfres)); + (u8 *)vfres, sizeof(*vfres)); } /** -- cgit v1.2.3 From c0f4b163a03e73055dd734eaca64b9580e72e7fb Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 6 Nov 2017 08:31:59 +0200 Subject: e1000e: fix the use of magic numbers for buffer overrun issue This is a follow on to commit b10effb92e27 ("fix buffer overrun while the I219 is processing DMA transactions") to address David Laights concerns about the use of "magic" numbers. So define masks as well as add additional code comments to give a better understanding of what needs to be done to avoid a buffer overrun. Signed-off-by: Sasha Neftin Reviewed-by: Alexander H Duyck Reviewed-by: Dima Ruinskiy Reviewed-by: Raanan Avargil Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 ++- drivers/net/ethernet/intel/e1000e/netdev.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 67163ca898ba..00a36df02a3f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -113,7 +113,8 @@ #define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field */ #define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs */ #define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */ -#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29) +#define E1000_TARC0_CB_MULTIQ_3_REQ 0x30000000 +#define E1000_TARC0_CB_MULTIQ_2_REQ 0x20000000 #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index f2f49239b015..9f18d39bdc8f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3034,9 +3034,12 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) ew32(IOSFPC, reg_val); reg_val = er32(TARC(0)); - /* SPT and KBL Si errata workaround to avoid Tx hang */ - reg_val &= ~BIT(28); - reg_val |= BIT(29); + /* SPT and KBL Si errata workaround to avoid Tx hang. + * Dropping the number of outstanding requests from + * 3 to 2 in order to avoid a buffer overrun. + */ + reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ; + reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ; ew32(TARC(0), reg_val); } } -- cgit v1.2.3 From 80752a98a0a0f666b263fecea327d4018b3f36f9 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 17 Nov 2017 15:35:57 -0800 Subject: i40e: Fix reporting incorrect error codes Adding cloud filters could fail for a number of reasons, unsupported filter fields for example, which fails during validation of fields itself. This will not result in admin command errors and converting the admin queue status to posix error code using i40e_aq_rc_to_posix would result in incorrect error values. If the failure was due to AQ error itself, reporting that correctly is handled in the inner function. Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4c08cc86463e..321d8be80871 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7401,7 +7401,6 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %s\n", i40e_stat_str(&pf->hw, err)); - err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); goto err; } -- cgit v1.2.3 From 8abd20b458cafd4c4e632fe7fb8f1a9861b02ee0 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Sat, 18 Nov 2017 21:53:58 +0100 Subject: e1000: Fix off-by-one in debug message Signed-off-by: Ahmad Fatoum Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_hw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 8172cf08cc33..3bac9df1c099 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -4307,8 +4307,10 @@ static void e1000_init_rx_addrs(struct e1000_hw *hw) rar_num = E1000_RAR_ENTRIES; - /* Zero out the other 15 receive addresses. */ - e_dbg("Clearing RAR[1-15]\n"); + /* Zero out the following 14 receive addresses. RAR[15] is for + * manageability + */ + e_dbg("Clearing RAR[1-14]\n"); for (i = 1; i < rar_num; i++) { E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); E1000_WRITE_FLUSH(); -- cgit v1.2.3 From b12cbb21586277f72533769832c24cc6c1d60ab3 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 22 Nov 2017 07:33:38 -0800 Subject: apparmor: fix oops in audit_signal_cb hook The apparmor_audit_data struct ordering got messed up during a merge conflict, resulting in the signal integer and peer pointer being in a union instead of a struct. For most of the 4.13 and 4.14 life cycle, this was hidden by commit 651e28c5537a ("apparmor: add base infastructure for socket mediation") which fixed the apparmor_audit_data struct when its data was added. When that commit was reverted in -rc7 the signal audit bug was exposed, and unfortunately it never showed up in any of the testing until after 4.14 was released. Shaun Khan, Zephaniah E. Loss-Cutler-Hull filed nearly simultaneous bug reports (with different oopes, the smaller of which is included below). Full credit goes to Tetsuo Handa for jumping on this as well and noticing the audit data struct problem and reporting it. [ 76.178568] BUG: unable to handle kernel paging request at ffffffff0eee3bc0 [ 76.178579] IP: audit_signal_cb+0x6c/0xe0 [ 76.178581] PGD 1a640a067 P4D 1a640a067 PUD 0 [ 76.178586] Oops: 0000 [#1] PREEMPT SMP [ 76.178589] Modules linked in: fuse rfcomm bnep usblp uvcvideo btusb btrtl btbcm btintel bluetooth ecdh_generic ip6table_filter ip6_tables xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables intel_rapl joydev wmi_bmof serio_raw iwldvm iwlwifi shpchp kvm_intel kvm irqbypass autofs4 algif_skcipher nls_iso8859_1 nls_cp437 crc32_pclmul ghash_clmulni_intel [ 76.178620] CPU: 0 PID: 10675 Comm: pidgin Not tainted 4.14.0-f1-dirty #135 [ 76.178623] Hardware name: Hewlett-Packard HP EliteBook Folio 9470m/18DF, BIOS 68IBD Ver. F.62 10/22/2015 [ 76.178625] task: ffff9c7a94c31dc0 task.stack: ffffa09b02a4c000 [ 76.178628] RIP: 0010:audit_signal_cb+0x6c/0xe0 [ 76.178631] RSP: 0018:ffffa09b02a4fc08 EFLAGS: 00010292 [ 76.178634] RAX: ffffa09b02a4fd60 RBX: ffff9c7aee0741f8 RCX: 0000000000000000 [ 76.178636] RDX: ffffffffee012290 RSI: 0000000000000006 RDI: ffff9c7a9493d800 [ 76.178638] RBP: ffffa09b02a4fd40 R08: 000000000000004d R09: ffffa09b02a4fc46 [ 76.178641] R10: ffffa09b02a4fcb8 R11: ffff9c7ab44f5072 R12: ffffa09b02a4fd40 [ 76.178643] R13: ffffffff9e447be0 R14: ffff9c7a94c31dc0 R15: 0000000000000001 [ 76.178646] FS: 00007f8b11ba2a80(0000) GS:ffff9c7afea00000(0000) knlGS:0000000000000000 [ 76.178648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 76.178650] CR2: ffffffff0eee3bc0 CR3: 00000003d5209002 CR4: 00000000001606f0 [ 76.178652] Call Trace: [ 76.178660] common_lsm_audit+0x1da/0x780 [ 76.178665] ? d_absolute_path+0x60/0x90 [ 76.178669] ? aa_check_perms+0xcd/0xe0 [ 76.178672] aa_check_perms+0xcd/0xe0 [ 76.178675] profile_signal_perm.part.0+0x90/0xa0 [ 76.178679] aa_may_signal+0x16e/0x1b0 [ 76.178686] apparmor_task_kill+0x51/0x120 [ 76.178690] security_task_kill+0x44/0x60 [ 76.178695] group_send_sig_info+0x25/0x60 [ 76.178699] kill_pid_info+0x36/0x60 [ 76.178703] SYSC_kill+0xdb/0x180 [ 76.178707] ? preempt_count_sub+0x92/0xd0 [ 76.178712] ? _raw_write_unlock_irq+0x13/0x30 [ 76.178716] ? task_work_run+0x6a/0x90 [ 76.178720] ? exit_to_usermode_loop+0x80/0xa0 [ 76.178723] entry_SYSCALL_64_fastpath+0x13/0x94 [ 76.178727] RIP: 0033:0x7f8b0e58b767 [ 76.178729] RSP: 002b:00007fff19efd4d8 EFLAGS: 00000206 ORIG_RAX: 000000000000003e [ 76.178732] RAX: ffffffffffffffda RBX: 0000557f3e3c2050 RCX: 00007f8b0e58b767 [ 76.178735] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000263b [ 76.178737] RBP: 0000000000000000 R08: 0000557f3e3c2270 R09: 0000000000000001 [ 76.178739] R10: 000000000000022d R11: 0000000000000206 R12: 0000000000000000 [ 76.178741] R13: 0000000000000001 R14: 0000557f3e3c13c0 R15: 0000000000000000 [ 76.178745] Code: 48 8b 55 18 48 89 df 41 b8 20 00 08 01 5b 5d 48 8b 42 10 48 8b 52 30 48 63 48 4c 48 8b 44 c8 48 31 c9 48 8b 70 38 e9 f4 fd 00 00 <48> 8b 14 d5 40 27 e5 9e 48 c7 c6 7d 07 19 9f 48 89 df e8 fd 35 [ 76.178794] RIP: audit_signal_cb+0x6c/0xe0 RSP: ffffa09b02a4fc08 [ 76.178796] CR2: ffffffff0eee3bc0 [ 76.178799] ---[ end trace 514af9529297f1a3 ]--- Fixes: cd1dbf76b23d ("apparmor: add the ability to mediate signals") Reported-by: Zephaniah E. Loss-Cutler-Hull Reported-by: Shuah Khan Suggested-by: Tetsuo Handa Tested-by: Ivan Kozik Tested-by: Zephaniah E. Loss-Cutler-Hull Tested-by: Christian Boltz Tested-by: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: John Johansen --- security/apparmor/include/audit.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 620e81169659..4ac095118717 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,17 +121,19 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - struct { - const char *target; - kuid_t ouid; - } fs; + union { + struct { + const char *target; + kuid_t ouid; + } fs; + int signal; + }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; - int signal; struct { int rlim; unsigned long max; -- cgit v1.2.3 From 8f5abe842e84ba9e72485ddd9dc02a3562b54e2a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Nov 2017 16:45:56 -0800 Subject: proc: don't report kernel addresses in /proc//stack This just changes the file to report them as zero, although maybe even that could be removed. I checked, and at least procps doesn't actually seem to parse the 'stack' file at all. And since the file doesn't necessarily even exist (it requires CONFIG_STACKTRACE), possibly other tools don't really use it either. That said, in case somebody parses it with tools, just having that zero there should keep such tools happy. Signed-off-by: Linus Torvalds --- fs/proc/base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 31934cb9dfc8..28fa85276eec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -443,8 +443,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, save_stack_trace_tsk(task, &trace); for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%pK>] %pB\n", - (void *)entries[i], (void *)entries[i]); + seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); } unlock_trace(task); } -- cgit v1.2.3 From bd467e4eababe4c04272c1e646f066db02734c79 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: hwmon: (pmbus) Use 64bit math for DIRECT format values Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 52a58b8b6e1b..a139940cd991 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -499,8 +500,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -528,11 +529,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -656,7 +658,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -673,18 +676,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, -- cgit v1.2.3 From ae5c631e605a452a5a0e73205a92810c01ed954b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Nov 2017 21:41:56 +0200 Subject: drm/i915: Don't try indexed reads to alternate slave addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can only specify the one slave address to indexed reads/writes. Make sure the messages we check are destined to the same slave address before deciding to do an indexed transfer. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit c4deb62d7821672265b87952bcd1c808f3bf3e8f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index eb5827110d8f..8affd47b98b8 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -438,6 +438,7 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && + msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && (msgs[i + 1].flags & I2C_M_RD)); } -- cgit v1.2.3 From 56350fb8978bbf4aafe08f21234e161dd128b417 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Nov 2017 21:41:57 +0200 Subject: drm/i915: Prevent zero length "index" write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware always writes one or two bytes in the index portion of an indexed transfer. Make sure the message we send as the index doesn't have a zero length. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit bb9e0d4bca50f429152e74a459160b41f3d60fb2) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 8affd47b98b8..49fdf09f9919 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -439,7 +439,8 @@ gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } -- cgit v1.2.3 From a45b30a6c5db631e2ba680304bd5edd0cd1f9643 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Nov 2017 19:41:55 +0000 Subject: drm/i915/fbdev: Serialise early hotplug events with async fbdev config As both the hotplug event and fbdev configuration run asynchronously, it is possible for them to run concurrently. If configuration fails, we were freeing the fbdev causing a use-after-free in the hotplug event. <7>[ 3069.935211] [drm:intel_fb_initial_config [i915]] Not using firmware configuration <7>[ 3069.935225] [drm:drm_setup_crtcs] looking for cmdline mode on connector 77 <7>[ 3069.935229] [drm:drm_setup_crtcs] looking for preferred mode on connector 77 0 <7>[ 3069.935233] [drm:drm_setup_crtcs] found mode 3200x1800 <7>[ 3069.935236] [drm:drm_setup_crtcs] picking CRTCs for 8192x8192 config <7>[ 3069.935253] [drm:drm_setup_crtcs] desired mode 3200x1800 set on crtc 43 (0,0) <7>[ 3069.935323] [drm:intelfb_create [i915]] no BIOS fb, allocating a new one <4>[ 3069.967737] general protection fault: 0000 [#1] PREEMPT SMP <0>[ 3069.977453] --------------------------------- <4>[ 3069.977457] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm r8169 mei_me mii prime_numbers mei i2c_hid pinctrl_geminilake pinctrl_intel [last unloaded: i915] <4>[ 3069.977492] CPU: 1 PID: 15414 Comm: kworker/1:0 Tainted: G U 4.14.0-CI-CI_DRM_3388+ #1 <4>[ 3069.977497] Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 <4>[ 3069.977508] Workqueue: events output_poll_execute <4>[ 3069.977512] task: ffff880177734e40 task.stack: ffffc90001fe4000 <4>[ 3069.977519] RIP: 0010:__lock_acquire+0x109/0x1b60 <4>[ 3069.977523] RSP: 0018:ffffc90001fe7bb0 EFLAGS: 00010002 <4>[ 3069.977526] RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000282 RCX: 0000000000000000 <4>[ 3069.977530] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880170d4efd0 <4>[ 3069.977534] RBP: ffffc90001fe7c70 R08: 0000000000000001 R09: 0000000000000000 <4>[ 3069.977538] R10: 0000000000000000 R11: ffffffff81899609 R12: ffff880170d4efd0 <4>[ 3069.977542] R13: ffff880177734e40 R14: 0000000000000001 R15: 0000000000000000 <4>[ 3069.977547] FS: 0000000000000000(0000) GS:ffff88017fc80000(0000) knlGS:0000000000000000 <4>[ 3069.977551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 3069.977555] CR2: 00007f7e8b7bcf04 CR3: 0000000003e0f000 CR4: 00000000003406e0 <4>[ 3069.977559] Call Trace: <4>[ 3069.977565] ? mark_held_locks+0x64/0x90 <4>[ 3069.977571] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977575] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977579] ? trace_hardirqs_on_caller+0xde/0x1c0 <4>[ 3069.977583] ? _raw_spin_unlock_irq+0x2f/0x50 <4>[ 3069.977588] ? finish_task_switch+0xa5/0x210 <4>[ 3069.977592] ? lock_acquire+0xaf/0x200 <4>[ 3069.977596] lock_acquire+0xaf/0x200 <4>[ 3069.977600] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977604] _raw_spin_lock+0x2a/0x40 <4>[ 3069.977608] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977612] __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977616] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977621] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977625] drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977630] output_poll_execute+0x8d/0x180 <4>[ 3069.977635] process_one_work+0x22e/0x660 <4>[ 3069.977640] worker_thread+0x48/0x3a0 <4>[ 3069.977644] ? _raw_spin_unlock_irqrestore+0x4c/0x60 <4>[ 3069.977649] kthread+0x102/0x140 <4>[ 3069.977653] ? process_one_work+0x660/0x660 <4>[ 3069.977657] ? kthread_create_on_node+0x40/0x40 <4>[ 3069.977662] ret_from_fork+0x27/0x40 <4>[ 3069.977666] Code: 8d 62 f8 c3 49 81 3c 24 e0 fa 3c 82 41 be 00 00 00 00 45 0f 45 f0 83 fe 01 77 86 89 f0 49 8b 44 c4 08 48 85 c0 0f 84 76 ff ff ff ff 80 38 01 00 00 8b 1d 62 f9 e8 01 45 8b 85 b8 08 00 00 85 <1>[ 3069.977707] RIP: __lock_acquire+0x109/0x1b60 RSP: ffffc90001fe7bb0 <4>[ 3069.977712] ---[ end trace 4ad012eb3af62df7 ]--- In order to keep the dev_priv->ifbdev alive after failure, we have to avoid the free and leave it empty until we unload the module (which is less than ideal, but a necessary evil for simplicity). Then we can use intel_fbdev_sync() to serialise the hotplug event with the configuration. The serialisation between the two was removed in commit 934458c2c95d ("Revert "drm/i915: Fix races on fbdev""), but the use after free is much older, commit 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 934458c2c95d ("Revert "drm/i915: Fix races on fbdev"") Signed-off-by: Chris Wilson Cc: Lukas Wunner Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/20171125194155.355-1-chris@chris-wilson.co.uk (cherry picked from commit ad88d7fc6c032ddfb32b8d496a070ab71de3a64f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b8af35187d22..ea96682568e8 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -697,10 +697,8 @@ static void intel_fbdev_initial_config(void *data, async_cookie_t cookie) /* Due to peculiar init order wrt to hpd handling this is separate. */ if (drm_fb_helper_initial_config(&ifbdev->helper, - ifbdev->preferred_bpp)) { + ifbdev->preferred_bpp)) intel_fbdev_unregister(to_i915(ifbdev->helper.dev)); - intel_fbdev_fini(to_i915(ifbdev->helper.dev)); - } } void intel_fbdev_initial_config_async(struct drm_device *dev) @@ -800,7 +798,11 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; - if (ifbdev) + if (!ifbdev) + return; + + intel_fbdev_sync(ifbdev); + if (ifbdev->vma) drm_fb_helper_hotplug_event(&ifbdev->helper); } -- cgit v1.2.3 From ac29fc66855b79c2960c63a4a66952d5b721d698 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Nov 2017 16:10:27 +0100 Subject: drm/i915: fix intel_backlight_device_register declaration The alternative intel_backlight_device_register() definition apparently never got used, but I have now run into a case of i915 being compiled without CONFIG_BACKLIGHT_CLASS_DEVICE, resulting in a number of identical warnings: drivers/gpu/drm/i915/intel_drv.h:1739:12: error: 'intel_backlight_device_register' defined but not used [-Werror=unused-function] This marks the function as 'inline', which was surely the original intention here. Fixes: 1ebaa0b9c2d4 ("drm/i915: Move backlight registration to connector registration") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171127151239.1813673-1-arnd@arndb.de (cherry picked from commit 2de2d0b063b08becb2c67a2c338c44e37bdcffee) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7bc60c848940..6c7f8bca574e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1736,7 +1736,7 @@ extern struct drm_display_mode *intel_find_panel_downclock( int intel_backlight_device_register(struct intel_connector *connector); void intel_backlight_device_unregister(struct intel_connector *connector); #else /* CONFIG_BACKLIGHT_CLASS_DEVICE */ -static int intel_backlight_device_register(struct intel_connector *connector) +static inline int intel_backlight_device_register(struct intel_connector *connector) { return 0; } -- cgit v1.2.3 From 762ff4678e89a5e3f8b2237533e04d3ef2737e78 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 28 Nov 2017 12:40:59 +0800 Subject: USB: serial: usb_debug: add new USB device id USB vendor id and product id for Linux USB Debug Target is added. Signed-off-by: Lu Baolu Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/usb_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index ab5a2ac4993a..aaf4813e4971 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -31,12 +31,14 @@ static const struct usb_device_id id_table[] = { }; static const struct usb_device_id dbc_id_table[] = { + { USB_DEVICE(0x1d6b, 0x0010) }, { USB_DEVICE(0x1d6b, 0x0011) }, { }, }; static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(0x0525, 0x127a) }, + { USB_DEVICE(0x1d6b, 0x0010) }, { USB_DEVICE(0x1d6b, 0x0011) }, { }, }; -- cgit v1.2.3 From 679fd3ebabc23dec33d42525b19479f16f355e61 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Nov 2017 14:58:31 +0800 Subject: drm/i915/gvt: Fix unsafe locking caused by spin_unlock_bh The caller of shadow_context_status_change may disable irqs. So it is not safe to use spin_unlock_bh in such context. Let's switch to irqsave version for safety. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 4504 at kernel/softirq.c:161 __local_bh_enable_ip+0x46/0x60 [ 168.797710] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.2.8 01/26/2016 [ 168.797712] task: ffff8c693d22db80 task.stack: ffffb51b482bc000 [ 168.797718] RIP: 0010:__local_bh_enable_ip+0x46/0x60 [ 168.797721] RSP: 0018:ffffb51b482bfa10 EFLAGS: 00010046 [ 168.797724] RAX: 0000000000000046 RBX: ffff8c6900278000 RCX: 00000000ffffffff [ 168.797726] RDX: 0000000000000001 RSI: 0000000000000200 RDI: ffffffffc06a0330 [ 168.797728] RBP: ffffb51b482bfa10 R08: 0000000000000000 R09: ffff8c690027cb90 [ 168.797730] R10: ffffb51b482bfa40 R11: 00000004072f0001 R12: 0000000000000000 [ 168.797732] R13: 0000000000000000 R14: ffff8c690027ca9c R15: 0000000000000000 [ 168.797735] FS: 00007ff187c56700(0000) GS:ffff8c6959d00000(0000) knlGS:0000000000000000 [ 168.797738] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 168.797740] CR2: 0000562bc0c3991f CR3: 0000000430614006 CR4: 00000000003606e0 [ 168.797742] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 168.797744] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 168.797745] Call Trace: [ 168.797755] _raw_spin_unlock_bh+0x1e/0x20 [ 168.797826] shadow_context_status_change+0x120/0x1e0 [i915] [ 168.797831] notifier_call_chain+0x4a/0x70 [ 168.797834] atomic_notifier_call_chain+0x1a/0x20 [ 168.797896] execlists_cancel_port_requests+0x4f/0x80 [i915] [ 168.797956] reset_common_ring+0x30/0x100 [i915] [ 168.798007] i915_gem_reset_engine+0x114/0x330 [i915] [ 168.798060] ? i915_gem_retire_requests+0x75/0x180 [i915] [ 168.798111] i915_gem_reset+0x3e/0xb0 [i915] [ 168.798149] i915_reset+0x10b/0x1c0 [i915] [ 168.798187] i915_reset_device+0x209/0x220 [i915] [ 168.798225] ? gen8_gt_irq_ack+0x170/0x170 [i915] [ 168.798229] ? __queue_work+0x430/0x430 [ 168.798270] i915_handle_error+0x285/0x420 [i915] [ 168.798275] ? mntput+0x24/0x40 [ 168.798281] ? terminate_walk+0x8e/0xf0 [ 168.798328] i915_wedged_set+0x84/0xc0 [i915] [ 168.798333] simple_attr_write+0xab/0xc0 [ 168.798337] full_proxy_write+0x54/0x90 [ 168.798343] __vfs_write+0x37/0x170 [ 168.798349] ? common_file_perm+0x4c/0x100 [ 168.798355] ? apparmor_file_permission+0x1a/0x20 [ 168.798361] ? security_file_permission+0x3b/0xc0 [ 168.798365] vfs_write+0xb8/0x1b0 [ 168.798370] SyS_write+0x55/0xc0 [ 168.798376] entry_SYSCALL_64_fastpath+0x1e/0xa9 Fixes: 0e86cc9 ("drm/i915/gvt: implement per-vm mmio switching optimization") Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..f031234290f6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -140,9 +140,10 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id ring_id = req->engine->id; struct intel_vgpu_workload *workload; + unsigned long flags; if (!is_gvt_request(req)) { - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (action == INTEL_CONTEXT_SCHEDULE_IN && scheduler->engine_owner[ring_id]) { /* Switch ring from vGPU to host. */ @@ -150,7 +151,7 @@ static int shadow_context_status_change(struct notifier_block *nb, NULL, ring_id); scheduler->engine_owner[ring_id] = NULL; } - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); return NOTIFY_OK; } @@ -161,7 +162,7 @@ static int shadow_context_status_change(struct notifier_block *nb, switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (workload->vgpu != scheduler->engine_owner[ring_id]) { /* Switch ring from host to vGPU or vGPU to vGPU. */ intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], @@ -170,7 +171,7 @@ static int shadow_context_status_change(struct notifier_block *nb, } else gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", ring_id, workload->vgpu->id); - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: -- cgit v1.2.3 From bf3a26b3cba59e2eee91cec3de6fdb4e8e670295 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 21 Nov 2017 10:54:41 +0800 Subject: drm/i915/gvt: remove skl_misc_ctl_write handler With different settings of compressed data hash mode between VMs and host may cause gpu issues. Commit: 1999f108c ("drm/i915/gvt: Disable compression workaround for Gen9") disable compression workaround of guest in gvt host to align with host. Commit: 93564044f ("drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS") add compression workaround, then we can remove the skl_misc_ctl_write hanlder. Better solution should be always keeping same settings as host, and bypass the write request from VMs, but it need to fetch data from host's "Context". Cc: Zhi Wang Signed-off-by: Weinan Li Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 45 +++++-------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b92..44cd5ff5e97d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1381,40 +1381,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1671,8 +1637,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2564,8 +2530,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2615,8 +2580,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); MMIO_D(0x45504, D_SKL_PLUS); MMIO_D(0x45520, D_SKL_PLUS); MMIO_D(0x46000, D_SKL_PLUS); -- cgit v1.2.3 From c3c80f0736bb2767fab983d1ae53252b2ddd405d Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 14 Nov 2017 17:09:35 +0800 Subject: drm/i915/gvt: Move request alloc to dispatch_workload path only Previously the performance is improved through the workload auditing and shadowing ahead of vGPU scheduling, however, there is the case that more requests are allocated in submit_context before the previous request is added, the timeline will hold its seqno which is later. This patch is to move the request alloc to dispatch_workload function, where is the same place as request is added. It will fix the issue of kernel BUG for (timeline->seqno != request->fence.seqno) check when add_request. Fixes: 89ea20b930cb ("drm/i915/gvt: Factor out scan and shadow from workload dispatch") Signed-off-by: Chuanxiao Dong Signed-off-by: fred gao Signed-off-by: Zhenyu Wang (cherry picked from commit f2880e04f3a5419366926182fc97a3c2e4fd8f2a) --- drivers/gpu/drm/i915/gvt/execlist.c | 6 ++++++ drivers/gpu/drm/i915/gvt/scheduler.c | 24 ++++++++++++++++++++---- drivers/gpu/drm/i915/gvt/scheduler.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 4427be18e4a9..940cdaaa3f24 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -496,6 +496,12 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + ret = prepare_shadow_batch_buffer(workload); if (ret) { gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f031234290f6..3ac1dc97a7a0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -254,7 +254,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_ring *ring; int ret; @@ -300,6 +299,26 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = populate_shadow_context(workload); if (ret) goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; + int ret; rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -314,14 +333,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; - workload->shadowed = true; return 0; err_unpin: engine->context_unpin(engine, shadow_ctx); -err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2d694f6c0907..b9f872204d7e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -142,4 +142,7 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload); + #endif -- cgit v1.2.3 From 7e60590208942de856fd43956ccebd19e71d686f Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 16 Nov 2017 16:54:23 +0800 Subject: drm/i915/gvt: enabled pipe A default on creating vgpu when i915 driver unloading, it will shutdown all CRTCs and it will introudce kernel panic when conducting igt drv_module_reload test case under guest environment (bug reported by XENGT-468) as below: BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 IP: intel_edp_backlight_off+0xe/0x7c [i915] RIP: 0010:intel_edp_backlight_off+0xe/0x7c [i915] Call Trace: intel_disable_ddi+0xb3/0xbc [i915] intel_modeset_setup_hw_state+0x654/0xb4c [i915] intel_modeset_init+0x9f1/0xe69 [i915] ? intel_i2c_reset+0x3d/0x40 [i915] ? intel_setup_gmbus+0xba/0x249 [i915] i915_driver_load+0xae5/0xcc0 [i915] i915_pci_probe+0x3a/0x3c [i915] local_pci_probe+0x38/0x7b pci_device_probe+0xec/0x12b driver_probe_device+0x134/0x294 __driver_attach+0x6a/0x8c ? driver_probe_device+0x294/0x294 bus_for_each_dev+0x68/0x80 driver_attach+0x19/0x1b bus_add_driver+0xea/0x1d3 ? 0xffffffffa03cd000 driver_register+0x85/0xc1 ? 0xffffffffa03cd000 __pci_register_driver+0x55/0x57 i915_init+0x57/0x5a [i915] do_one_initcall+0x8a/0x12e ? __vunmap+0x8d/0x93 ? kmem_cache_alloc_trace+0x96/0x11c do_init_module+0x5a/0x1e1 in this case, active connector detected but no active pipe available, so it will hang to disable connector. to fix, on vgpu creating, to report active pipe available for guest. Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a659..355120865efd 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -282,6 +282,7 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -307,6 +308,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; return 0; } -- cgit v1.2.3 From b721b65af4eb46df6a1d9e34b14003225e403565 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 28 Nov 2017 07:29:54 +0800 Subject: drm/i915/gvt: Correct ADDR_4K/2M/1G_MASK definition For ADDR_4K_MASK, bit[45..12] should be 1, all other bits should be 0. The current definition wrongly set bit[46] as 1 also. This path fixes this. v2: Add commit message, fixes and cc stable.(Zhenyu) Fixes: 2707e4446688("drm/i915/gvt: vGPU graphics memory virtualization") Signed-off-by: Xiong Zhang Cc: stable@vger.kernel.org Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2801d70579d8..8e331142badb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -311,9 +311,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) +#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) +#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) +#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { -- cgit v1.2.3 From b2fc059fa549fe6881d4c1f8d698b0f50bcd16ec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 16:18:28 +0000 Subject: usb: gadget: don't dereference g until after it has been null checked Avoid dereferencing pointer g until after g has been sanity null checked; move the assignment of cdev much later when it is required into a more local scope. Detected by CoverityScan, CID#1222135 ("Dereference before null check") Fixes: b785ea7ce662 ("usb: gadget: composite: fix ep->maxburst initialization") Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index eec14e6ed20b..77c7ecca816a 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -146,7 +146,6 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep) { - struct usb_composite_dev *cdev = get_gadget_data(g); struct usb_endpoint_descriptor *chosen_desc = NULL; struct usb_descriptor_header **speed_desc = NULL; @@ -226,8 +225,12 @@ ep_found: _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) + if (comp_desc->bMaxBurst != 0) { + struct usb_composite_dev *cdev; + + cdev = get_gadget_data(g); ERROR(cdev, "ep0 bMaxBurst must be 0\n"); + } _ep->maxburst = 1; break; } -- cgit v1.2.3 From a58204ab91ad8cae4d8474aa0ba5d1fc504860c9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 13 Nov 2017 17:59:18 +0900 Subject: usb: gadget: udc: renesas_usb3: fix number of the pipes This controller on R-Car Gen3 has 6 pipes that included PIPE 0 for control actually. But, the datasheet has error in writing as it has 31 pipes. (However, the previous code defined 30 pipes wrongly...) Anyway, this patch fixes it. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index bc37f40baacf..6e87af248367 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -252,7 +252,7 @@ #define USB3_EP0_SS_MAX_PACKET_SIZE 512 #define USB3_EP0_HSFS_MAX_PACKET_SIZE 64 #define USB3_EP0_BUF_SIZE 8 -#define USB3_MAX_NUM_PIPES 30 +#define USB3_MAX_NUM_PIPES 6 /* This includes PIPE 0 */ #define USB3_WAIT_US 3 #define USB3_DMA_NUM_SETTING_AREA 4 /* -- cgit v1.2.3 From 7a9618a22aadffb55027d665491adf466bced61a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 31 Oct 2017 11:03:19 -0700 Subject: usb: gadget: allow to enable legacy drivers without USB_ETH Considerable time ago the legacy gadget menu was added inside the USB_ETH choice. I think this was a mistake and that the legacy gadget menu should have been added after "endchoice" instead of before. Hence this patch. Fixes: commit 8443f2d2b778 ("usb: gadget: Gadget directory cleanup - group legacy gadgets") Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Nicholas Bellinger Cc: Andrzej Pietrasiewicz Cc: linux-usb@vger.kernel.org Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 4 ++-- drivers/usb/gadget/legacy/Kconfig | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 31cce7805eb2..0a19a76645ad 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -508,8 +508,8 @@ choice controller, and the relevant drivers for each function declared by the device. -source "drivers/usb/gadget/legacy/Kconfig" - endchoice +source "drivers/usb/gadget/legacy/Kconfig" + endif # USB_GADGET diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index a12fb459dbd9..9570bbeced4f 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -13,6 +13,14 @@ # both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG). # +menuconfig USB_GADGET_LEGACY + bool "Legacy USB Gadget Support" + help + Legacy USB gadgets are USB gadgets that do not use the USB gadget + configfs interface. + +if USB_GADGET_LEGACY + config USB_ZERO tristate "Gadget Zero (DEVELOPMENT)" select USB_LIBCOMPOSITE @@ -490,3 +498,5 @@ config USB_G_WEBCAM Say "y" to link the driver statically, or "m" to build a dynamically linked module called "g_webcam". + +endif -- cgit v1.2.3 From a4f0927ef588cf62bb864707261482c874352942 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 31 Oct 2017 15:56:29 +0200 Subject: usb: gadget: core: Fix ->udc_set_speed() speed handling Currently UDC core calls ->udc_set_speed() with the speed parameter containing the maximum speed supported by the gadget function driver. This might very well be more than that supported by the UDC controller driver. Select the lesser of the 2 speeds so both UDC and gadget function driver are operating within limits. This fixes PHY Erratic errors and 2 second enumeration delay on TI's AM437x platforms. Fixes: 6099eca796ae ("usb: gadget: core: introduce ->udc_set_speed() method") Cc: # v4.13+ Reported-by: Dylan Howey Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 61422d624ad0..93eff7dec2f5 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1069,8 +1069,12 @@ static inline void usb_gadget_udc_stop(struct usb_udc *udc) static inline void usb_gadget_udc_set_speed(struct usb_udc *udc, enum usb_device_speed speed) { - if (udc->gadget->ops->udc_set_speed) - udc->gadget->ops->udc_set_speed(udc->gadget, speed); + if (udc->gadget->ops->udc_set_speed) { + enum usb_device_speed s; + + s = min(speed, udc->gadget->max_speed); + udc->gadget->ops->udc_set_speed(udc->gadget, s); + } } /** -- cgit v1.2.3 From a3acc696085e112733d191a77b106e67a4fa110b Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Nov 2017 18:15:40 +0000 Subject: usb: f_fs: Force Reserved1=1 in OS_DESC_EXT_COMPAT The specification says that the Reserved1 field in OS_DESC_EXT_COMPAT must have the value "1", but when this feature was first implemented we rejected any non-zero values. This was adjusted to accept all non-zero values (while now rejecting zero) in commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 of OS_DESC_EXT_COMPAT"), but that breaks any userspace programs that worked previously by returning EINVAL when Reserved1 == 0 which was previously the only value that succeeded! If we just set the field to "1" ourselves, both old and new userspace programs continue to work correctly and, as a bonus, old programs are now compliant with the specification without having to fix anything themselves. Fixes: 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 of OS_DESC_EXT_COMPAT") Cc: Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 9aa457b53e01..b6cf5ab5a0a1 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2282,9 +2282,18 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, int i; if (len < sizeof(*d) || - d->bFirstInterfaceNumber >= ffs->interfaces_count || - !d->Reserved1) + d->bFirstInterfaceNumber >= ffs->interfaces_count) return -EINVAL; + if (d->Reserved1 != 1) { + /* + * According to the spec, Reserved1 must be set to 1 + * but older kernels incorrectly rejected non-zero + * values. We fix it here to avoid returning EINVAL + * in response to values we used to accept. + */ + pr_debug("usb_ext_compat_desc::Reserved1 forced to 1\n"); + d->Reserved1 = 1; + } for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) return -EINVAL; -- cgit v1.2.3 From 02d69c0359f7069c626cef6541f94a3a22dc5ed9 Mon Sep 17 00:00:00 2001 From: Matthew Giassa Date: Sat, 18 Nov 2017 08:26:17 -0800 Subject: staging: comedi: ni_atmio: fix license warning. Resolving license check warning for drivers/staging/comedi. Added the license definitions present in the rest of the module and made sure it's aligned with the license (GPL) in the comments for the affected file (ni_atmio.c). Original warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/staging/comedi//drivers/ni_atmio.o see include/linux/module.h for more information. No longer present after change. Signed-off-by: Matthew Giassa Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_atmio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/comedi/drivers/ni_atmio.c b/drivers/staging/comedi/drivers/ni_atmio.c index 2d62a8c57332..ae6ed96d7874 100644 --- a/drivers/staging/comedi/drivers/ni_atmio.c +++ b/drivers/staging/comedi/drivers/ni_atmio.c @@ -361,3 +361,8 @@ static struct comedi_driver ni_atmio_driver = { .detach = ni_atmio_detach, }; module_comedi_driver(ni_atmio_driver); + +MODULE_AUTHOR("Comedi http://www.comedi.org"); +MODULE_DESCRIPTION("Comedi low-level driver"); +MODULE_LICENSE("GPL"); + -- cgit v1.2.3 From c5f39d07860c35e5e4c63188139465af790f86ce Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 9 Nov 2017 09:16:09 +0000 Subject: staging: ccree: fix leak of import() after init() crypto_ahash_import() may be called either after crypto_ahash_init() or without such call. Right now we always internally call init() as part of import(), thus leaking memory and mappings if the user has already called init() herself. Fix this by only calling init() internally if the state is not already initialized. Fixes: commit 454527d0d94f ("staging: ccree: fix hash import/export") Cc: stable Signed-off-by: Gilad Ben-Yossef Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_hash.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c index d79090ed7f9c..1799d3f26a9e 100644 --- a/drivers/staging/ccree/ssi_hash.c +++ b/drivers/staging/ccree/ssi_hash.c @@ -1778,9 +1778,12 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in) } in += sizeof(u32); - rc = ssi_hash_init(state, ctx); - if (rc) - goto out; + /* call init() to allocate bufs if the user hasn't */ + if (!state->digest_buff) { + rc = ssi_hash_init(state, ctx); + if (rc) + goto out; + } dma_sync_single_for_cpu(dev, state->digest_buff_dma_addr, ctx->inter_digestsize, DMA_BIDIRECTIONAL); -- cgit v1.2.3 From b77992d2df9e47144354d1b25328b180afa33442 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 25 Nov 2017 13:32:38 -0600 Subject: staging: rtl8188eu: Fix incorrect response to SIOCGIWESSID When not associated with an AP, wifi device drivers should respond to the SIOCGIWESSID ioctl with a zero-length string for the SSID, which is the behavior expected by dhcpcd. Currently, this driver returns an error code (-1) from the ioctl call, which causes dhcpcd to assume that the device is not a wireless interface and therefore it fails to work correctly with it thereafter. This problem was reported and tested at https://github.com/lwfinger/rtl8188eu/issues/234. Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index c0664dc80bf2..446310775e90 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1395,19 +1395,13 @@ static int rtw_wx_get_essid(struct net_device *dev, if ((check_fwstate(pmlmepriv, _FW_LINKED)) || (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) { len = pcur_bss->Ssid.SsidLength; - - wrqu->essid.length = len; - memcpy(extra, pcur_bss->Ssid.Ssid, len); - - wrqu->essid.flags = 1; } else { - ret = -1; - goto exit; + len = 0; + *extra = 0; } - -exit: - + wrqu->essid.length = len; + wrqu->essid.flags = 1; return ret; } -- cgit v1.2.3 From 0590cdfead8cd6319cc58b5dbe8ab9fbaabae131 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 17 Nov 2017 09:50:10 +0200 Subject: staging: octeon-usb: use __delay() instead of cvmx_wait() Commit edf188bee1d9 ("MIPS: Octeon: Remove usage of cvmx_wait() everywhere.") deleted cvmx_wait() but forgot to update OCTEON USB staging driver. Fix that to allow driver build. Fixes: edf188bee1d9 ("MIPS: Octeon: Remove usage of cvmx_wait() everywhere.") Signed-off-by: Aaro Koskinen Reviewed-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon-usb/octeon-hcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c index 068aece25d37..cded30f145aa 100644 --- a/drivers/staging/octeon-usb/octeon-hcd.c +++ b/drivers/staging/octeon-usb/octeon-hcd.c @@ -394,7 +394,7 @@ struct octeon_hcd { result = -1; \ break; \ } else \ - cvmx_wait(100); \ + __delay(100); \ } \ } while (0); \ result; }) @@ -774,7 +774,7 @@ retry: usbn_clk_ctl.s.hclk_rst = 1; cvmx_write64_uint64(CVMX_USBNX_CLK_CTL(usb->index), usbn_clk_ctl.u64); /* 2e. Wait 64 core-clock cycles for HCLK to stabilize */ - cvmx_wait(64); + __delay(64); /* * 3. Program the power-on reset field in the USBN clock-control * register: @@ -795,7 +795,7 @@ retry: cvmx_write64_uint64(CVMX_USBNX_USBP_CTL_STATUS(usb->index), usbn_usbp_ctl_status.u64); /* 6. Wait 10 cycles */ - cvmx_wait(10); + __delay(10); /* * 7. Clear ATE_RESET field in the USBN clock-control register: * USBN_USBP_CTL_STATUS[ATE_RESET] = 0 -- cgit v1.2.3 From 69fc6cbbac542c349b3d350d10f6e394c253c81d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:24 +0800 Subject: btrfs: tree-checker: Fix false panic for sanity test [BUG] If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will instantly cause kernel panic like: ------ ... assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853 ... Call Trace: btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs] setup_items_for_insert+0x385/0x650 [btrfs] __btrfs_drop_extents+0x129a/0x1870 [btrfs] ... ----- [Cause] Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y. However quite some btrfs_mark_buffer_dirty() callers(*) don't really initialize its item data but only initialize its item pointers, leaving item data uninitialized. This makes tree-checker catch uninitialized data as error, causing such panic. *: These callers include but not limited to setup_items_for_insert() btrfs_split_item() btrfs_expand_item() [Fix] Add a new parameter @check_item_data to btrfs_check_leaf(). With @check_item_data set to false, item data check will be skipped and fallback to old btrfs_check_leaf() behavior. So we can still get early warning if we screw up item pointers, and avoid false panic. Cc: Filipe Manana Reported-by: Lakshmipathi.G Signed-off-by: Qu Wenruo Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++++++-- fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++----- fs/btrfs/tree-checker.h | 14 +++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index efce9a2fa9be..10a2a579cc7f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } @@ -3848,7 +3848,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 114fc5f0ecc5..ce4ed6ec8f39 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -242,7 +242,8 @@ static int check_leaf_item(struct btrfs_root *root, return ret; } -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, + bool check_item_data) { struct btrfs_fs_info *fs_info = root->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -361,10 +362,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return -EUCLEAN; } - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; + if (check_item_data) { + /* + * Check if the item size and content meet other + * criteria + */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + } prev_key.objectid = key.objectid; prev_key.type = key.type; @@ -374,6 +380,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return 0; } +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, false); +} + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) { unsigned long nr = btrfs_header_nritems(node); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96c486e95d70..3d53e8d6fda0 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,7 +20,19 @@ #include "ctree.h" #include "extent_io.h" -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); #endif -- cgit v1.2.3 From a7c42106ead7041b99662a125b408deb68a3e6aa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 14 Nov 2017 14:45:27 +0300 Subject: usb: add user selectable option for the whole USB Type-C Support It is more clear from user perspective to wrap the whole USB Type-C support under a single option that the user can select, then it is to always ask the user for every USB Type-C and USB Power Delivery driver separately. Reported-by: Linus Torvalds Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/Kconfig | 54 +++++++++++++++++++++++++++++++++++------- drivers/usb/typec/ucsi/Kconfig | 1 - 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig index 465d7da849c3..bcb2744c5977 100644 --- a/drivers/usb/typec/Kconfig +++ b/drivers/usb/typec/Kconfig @@ -1,13 +1,53 @@ -menu "USB Power Delivery and Type-C drivers" +menuconfig TYPEC + tristate "USB Type-C Support" + help + USB Type-C Specification defines a cable and connector for USB where + only one type of plug is supported on both ends, i.e. there will not + be Type-A plug on one end of the cable and Type-B plug on the other. + Determination of the host-to-device relationship happens through a + specific Configuration Channel (CC) which goes through the USB Type-C + cable. The Configuration Channel may also be used to detect optional + Accessory Modes - Analog Audio and Debug - and if USB Power Delivery + is supported, the Alternate Modes, where the connector is used for + something else then USB communication. + + USB Power Delivery Specification defines a protocol that can be used + to negotiate the voltage and current levels with the connected + partners. USB Power Delivery allows higher voltages then the normal + 5V, up to 20V, and current up to 5A over the cable. The USB Power + Delivery protocol is also used to negotiate the optional Alternate + Modes when they are supported. USB Power Delivery does not depend on + USB Type-C connector, however it is mostly used together with USB + Type-C connectors. + + USB Type-C and USB Power Delivery Specifications define a set of state + machines that need to be implemented in either software or firmware. + Simple USB Type-C PHYs, for example USB Type-C Port Controller + Interface Specification compliant "Port Controllers" need the state + machines to be handled in the OS, but stand-alone USB Type-C and Power + Delivery controllers handle the state machines inside their firmware. + The USB Type-C and Power Delivery controllers usually function + autonomously, and do not necessarily require drivers. + + Enable this configurations option if you have USB Type-C connectors on + your system and 1) you know your USB Type-C hardware requires OS + control (a driver) to function, or 2) if you need to be able to read + the status of the USB Type-C ports in your system, or 3) if you need + to be able to swap the power role (decide are you supplying or + consuming power over the cable) or data role (host or device) when + both roles are supported. + + For more information, see the kernel documentation for USB Type-C + Connector Class API (Documentation/driver-api/usb/typec.rst) + + and ABI (Documentation/ABI/testing/sysfs-class-typec). -config TYPEC - tristate +if TYPEC config TYPEC_TCPM tristate "USB Type-C Port Controller Manager" depends on USB - select TYPEC help The Type-C Port Controller Manager provides a USB PD and USB Type-C state machine for use with Type-C Port Controllers. @@ -22,7 +62,6 @@ config TYPEC_WCOVE depends on INTEL_SOC_PMIC depends on INTEL_PMC_IPC depends on BXT_WC_PMIC_OPREGION - select TYPEC help This driver adds support for USB Type-C detection on Intel Broxton platforms that have Intel Whiskey Cove PMIC. The driver can detect the @@ -31,14 +70,13 @@ config TYPEC_WCOVE To compile this driver as module, choose M here: the module will be called typec_wcove -endif +endif # TYPEC_TCPM source "drivers/usb/typec/ucsi/Kconfig" config TYPEC_TPS6598X tristate "TI TPS6598x USB Power Delivery controller driver" depends on I2C - select TYPEC help Say Y or M here if your system has TI TPS65982 or TPS65983 USB Power Delivery controller. @@ -46,4 +84,4 @@ config TYPEC_TPS6598X If you choose to build this driver as a dynamically linked module, the module will be called tps6598x.ko. -endmenu +endif # TYPEC diff --git a/drivers/usb/typec/ucsi/Kconfig b/drivers/usb/typec/ucsi/Kconfig index d0c31cee4720..e36d6c73c4a4 100644 --- a/drivers/usb/typec/ucsi/Kconfig +++ b/drivers/usb/typec/ucsi/Kconfig @@ -1,7 +1,6 @@ config TYPEC_UCSI tristate "USB Type-C Connector System Software Interface driver" depends on !CPU_BIG_ENDIAN - select TYPEC help USB Type-C Connector System Software Interface (UCSI) is a specification for an interface that allows the operating system to -- cgit v1.2.3 From 446f666da9f019ce2ffd03800995487e79a91462 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 23 Nov 2017 16:39:52 +0100 Subject: USB: usbfs: Filter flags passed in from user space USBDEVFS_URB_ISO_ASAP must be accepted only for ISO endpoints. Improve sanity checking. Reported-by: Andrey Konovalov Signed-off-by: Oliver Neukum Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 705c573d0257..a3fad4ec9870 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1442,14 +1442,18 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb int number_of_packets = 0; unsigned int stream_id = 0; void *buf; - - if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP | - USBDEVFS_URB_SHORT_NOT_OK | + unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK | USBDEVFS_URB_BULK_CONTINUATION | USBDEVFS_URB_NO_FSBR | USBDEVFS_URB_ZERO_PACKET | - USBDEVFS_URB_NO_INTERRUPT)) - return -EINVAL; + USBDEVFS_URB_NO_INTERRUPT; + /* USBDEVFS_URB_ISO_ASAP is a special case */ + if (uurb->type == USBDEVFS_URB_TYPE_ISO) + mask |= USBDEVFS_URB_ISO_ASAP; + + if (uurb->flags & ~mask) + return -EINVAL; + if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX) return -EINVAL; if (uurb->buffer_length > 0 && !uurb->buffer) -- cgit v1.2.3 From 33c309ebc797b908029fd3a0851aefe697e9b598 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 11 Nov 2017 16:31:18 +0100 Subject: USB: ulpi: fix bus-node lookup Fix bus-node lookup during registration, which ended up searching the whole device tree depth-first starting at the parent (or grand parent) rather than just matching on its children. To make things worse, the parent (or grand-parent) node could end being prematurely freed as well. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Reported-by: Peter Robinson Reported-by: Stephen Boyd Cc: stable # 4.10 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 8b351444cc40..9a2ab6751a23 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -180,9 +180,9 @@ static int ulpi_of_register(struct ulpi *ulpi) /* Find a ulpi bus underneath the parent or the grandparent */ parent = ulpi->dev.parent; if (parent->of_node) - np = of_find_node_by_name(parent->of_node, "ulpi"); + np = of_get_child_by_name(parent->of_node, "ulpi"); else if (parent->parent && parent->parent->of_node) - np = of_find_node_by_name(parent->parent->of_node, "ulpi"); + np = of_get_child_by_name(parent->parent->of_node, "ulpi"); if (!np) return 0; -- cgit v1.2.3 From 1d5a31582ef046d3b233f0da1a68ae26519b2f0a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Nov 2017 16:45:04 +0000 Subject: usb: host: fix incorrect updating of offset The variable temp is incorrectly being updated, instead it should be offset otherwise the loop just reads the same capability value and loops forever. Thanks to Alan Stern for pointing out the correct fix to my original fix. Fix also cleans up clang warning: drivers/usb/host/ehci-dbg.c:840:4: warning: Value stored to 'temp' is never read Fixes: d49d43174400 ("USB: misc ehci updates") Cc: stable Signed-off-by: Colin Ian King Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 19f00424f53e..3ed75aaa09d9 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -827,7 +827,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf) default: /* unknown */ break; } - temp = (cap >> 8) & 0xff; + offset = (cap >> 8) & 0xff; } } #endif -- cgit v1.2.3 From 81cf4a45360f70528f1f64ba018d61cb5767249a Mon Sep 17 00:00:00 2001 From: Masakazu Mokuno Date: Fri, 10 Nov 2017 01:25:50 +0900 Subject: USB: core: Add type-specific length check of BOS descriptors As most of BOS descriptors are longer in length than their header 'struct usb_dev_cap_header', comparing solely with it is not sufficient to avoid out-of-bounds access to BOS descriptors. This patch adds descriptor type specific length check in usb_get_bos_descriptor() to fix the issue. Signed-off-by: Masakazu Mokuno Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 28 ++++++++++++++++++++++++---- include/uapi/linux/usb/ch9.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index da8acd980fc6..55b198ba629b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -905,14 +905,25 @@ void usb_release_bos_descriptor(struct usb_device *dev) } } +static const __u8 bos_desc_len[256] = { + [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE, + [USB_CAP_TYPE_EXT] = USB_DT_USB_EXT_CAP_SIZE, + [USB_SS_CAP_TYPE] = USB_DT_USB_SS_CAP_SIZE, + [USB_SSP_CAP_TYPE] = USB_DT_USB_SSP_CAP_SIZE(1), + [CONTAINER_ID_TYPE] = USB_DT_USB_SS_CONTN_ID_SIZE, + [USB_PTM_CAP_TYPE] = USB_DT_USB_PTM_ID_SIZE, +}; + /* Get BOS descriptor set */ int usb_get_bos_descriptor(struct usb_device *dev) { struct device *ddev = &dev->dev; struct usb_bos_descriptor *bos; struct usb_dev_cap_header *cap; + struct usb_ssp_cap_descriptor *ssp_cap; unsigned char *buffer; - int length, total_len, num, i; + int length, total_len, num, i, ssac; + __u8 cap_type; int ret; bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL); @@ -965,7 +976,13 @@ int usb_get_bos_descriptor(struct usb_device *dev) dev->bos->desc->bNumDeviceCaps = i; break; } + cap_type = cap->bDevCapabilityType; length = cap->bLength; + if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) { + dev->bos->desc->bNumDeviceCaps = i; + break; + } + total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { @@ -973,7 +990,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) continue; } - switch (cap->bDevCapabilityType) { + switch (cap_type) { case USB_CAP_TYPE_WIRELESS_USB: /* Wireless USB cap descriptor is handled by wusb */ break; @@ -986,8 +1003,11 @@ int usb_get_bos_descriptor(struct usb_device *dev) (struct usb_ss_cap_descriptor *)buffer; break; case USB_SSP_CAP_TYPE: - dev->bos->ssp_cap = - (struct usb_ssp_cap_descriptor *)buffer; + ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; + ssac = (le32_to_cpu(ssp_cap->bmAttributes) & + USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) + dev->bos->ssp_cap = ssp_cap; break; case CONTAINER_ID_TYPE: dev->bos->ss_id = diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 41a0a81b01e6..c4c79aa331bd 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -880,6 +880,8 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ __u8 bReserved; } __attribute__((packed)); +#define USB_DT_USB_WIRELESS_CAP_SIZE 11 + /* USB 2.0 Extension descriptor */ #define USB_CAP_TYPE_EXT 2 @@ -1072,6 +1074,7 @@ struct usb_ptm_cap_descriptor { __u8 bDevCapabilityType; } __attribute__((packed)); +#define USB_DT_USB_PTM_ID_SIZE 3 /* * The size of the descriptor for the Sublink Speed Attribute Count * (SSAC) specified in bmAttributes[4:0]. -- cgit v1.2.3 From 973593a960ddac0f14f0d8877d2d0abe0afda795 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Thu, 9 Nov 2017 13:16:46 +0100 Subject: usb: hub: Cycle HUB power when initialization fails Sometimes the USB device gets confused about the state of the initialization and the connection fails. In particular, the device thinks that it's already set up and running while the host thinks the device still needs to be configured. To work around this issue, power-cycle the hub's output to issue a sort of "reset" to the device. This makes the device restart its state machine and then the initialization succeeds. This fixes problems where the kernel reports a list of errors like this: usb 1-1.3: device not accepting address 19, error -71 The end result is a non-functioning device. After this patch, the sequence becomes like this: usb 1-1.3: new high-speed USB device number 18 using ci_hdrc usb 1-1.3: device not accepting address 18, error -71 usb 1-1.3: new high-speed USB device number 19 using ci_hdrc usb 1-1.3: device not accepting address 19, error -71 usb 1-1-port3: attempt power cycle usb 1-1.3: new high-speed USB device number 21 using ci_hdrc usb-storage 1-1.3:1.2: USB Mass Storage device detected Signed-off-by: Mike Looijmans Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7ccdd3d4db84..cf7bbcb9a63c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4948,6 +4948,15 @@ loop: usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; + + /* When halfway through our retry count, power-cycle the port */ + if (i == (SET_CONFIG_TRIES / 2) - 1) { + dev_info(&port_dev->dev, "attempt power cycle\n"); + usb_hub_set_port_power(hdev, hub, port1, false); + msleep(2 * hub_power_on_good_delay(hub)); + usb_hub_set_port_power(hdev, hub, port1, true); + msleep(hub_power_on_good_delay(hub)); + } } if (hub->hdev->parent || !hcd->driver->port_handed_over || -- cgit v1.2.3 From c9d24f78268be444e803fb2bb138a2f598de9c23 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Nov 2017 11:00:45 -0800 Subject: usb: build drivers/usb/common/ when USB_SUPPORT is set PHY drivers can use ULPI interfaces when CONFIG_USB (which is host side support) is not enabled, so also build drivers/usb/ when CONFIG_USB_SUPPORT is enabled so that drivers/usb/common/ is built. ERROR: "ulpi_unregister_driver" [drivers/phy/ti/phy-tusb1210.ko] undefined! ERROR: "__ulpi_register_driver" [drivers/phy/ti/phy-tusb1210.ko] undefined! ERROR: "ulpi_read" [drivers/phy/ti/phy-tusb1210.ko] undefined! ERROR: "ulpi_write" [drivers/phy/ti/phy-tusb1210.ko] undefined! ERROR: "ulpi_unregister_driver" [drivers/phy/qualcomm/phy-qcom-usb-hs.ko] undefined! ERROR: "__ulpi_register_driver" [drivers/phy/qualcomm/phy-qcom-usb-hs.ko] undefined! ERROR: "ulpi_write" [drivers/phy/qualcomm/phy-qcom-usb-hs.ko] undefined! Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/Makefile b/drivers/Makefile index 1d034b680431..e06f7f633f73 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -105,6 +105,7 @@ obj-$(CONFIG_TC) += tc/ obj-$(CONFIG_UWB) += uwb/ obj-$(CONFIG_USB_PHY) += usb/ obj-$(CONFIG_USB) += usb/ +obj-$(CONFIG_USB_SUPPORT) += usb/ obj-$(CONFIG_PCI) += usb/ obj-$(CONFIG_USB_GADGET) += usb/ obj-$(CONFIG_OF) += usb/ -- cgit v1.2.3 From e43a12f1793ae1fe006e26fe9327a8840a92233c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 14 Nov 2017 01:31:15 -0500 Subject: usb: quirks: Add no-lpm quirk for KY-688 USB 3.1 Type-C Hub KY-688 USB 3.1 Type-C Hub internally uses a Genesys Logic hub to connect to Realtek r8153. Similar to commit ("7496cfe5431f2 usb: quirks: Add no-lpm quirk for Moshi USB to Ethernet Adapter"), no-lpm can make r8153 ethernet work. Signed-off-by: Kai-Heng Feng Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f1dbab6f798f..a10b346b9777 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -146,6 +146,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ + { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, -- cgit v1.2.3 From 7fee72d5e8f1e7b8d8212e28291b1a0243ecf2f1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 19:27:22 +0100 Subject: uas: Always apply US_FL_NO_ATA_1X quirk to Seagate devices We've been adding this as a quirk on a per device basis hoping that newer disk enclosures would do better, but that has not happened, so simply apply this quirk to all Seagate devices. Signed-off-by: Hans de Goede Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index 1fcd758a961f..3734a25e09e5 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -112,6 +112,10 @@ static int uas_use_uas_driver(struct usb_interface *intf, } } + /* All Seagate disk enclosures have broken ATA pass-through support */ + if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) + flags |= US_FL_NO_ATA_1X; + usb_stor_adjust_quirks(udev, &flags); if (flags & US_FL_IGNORE_UAS) { -- cgit v1.2.3 From 3bfd1300abfe3adb18e84a89d97a0e82a22124bb Mon Sep 17 00:00:00 2001 From: Matt Wilson Date: Mon, 13 Nov 2017 11:31:31 -0800 Subject: serial: 8250_pci: Add Amazon PCI serial device ID This device will be used in future Amazon EC2 instances as the primary serial port (i.e., data sent to this port will be available via the GetConsoleOuput [1] EC2 API). [1] http://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_GetConsoleOutput.html Cc: stable Signed-off-by: Matt Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index b7e0e3416641..54adf8d56350 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5135,6 +5135,9 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 }, { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 }, + /* Amazon PCI serial device */ + { PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL -- cgit v1.2.3 From 0ff3ab701963a845d52337ded7eebf2d1a14fe00 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 22 Nov 2017 09:57:28 +0000 Subject: serial: 8250_early: Only set divisor if valid clk & baud If either uartclk or baud are 0, avoid calculating and setting a divisor based on them since the output will almost certainly be garbage. This also allows platforms such as the MIPS generic kernel, which has no way to know a valid BASE_BASE for the board it is actually booted on at compile time, to set BASE_BAUD to 0 and avoid early_8250 setting a bad divisor. This fixes a regression caused by commit 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure"), which changed the behavior of of_setup_earlycon such that it sets a baud rate in the earlycon structure where previously it was left as 0. All boards supported by the MIPS generic kernel started outputting garbage from the boot console due to an incorrect divisor being set. Fixes: 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") Cc: stable # 4.14 Signed-off-by: Matt Redfearn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 362c25ff188a..ae6a256524d8 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -122,12 +122,14 @@ static void __init init_port(struct earlycon_device *device) serial8250_early_out(port, UART_FCR, 0); /* no fifo */ serial8250_early_out(port, UART_MCR, 0x3); /* DTR + RTS */ - divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud); - c = serial8250_early_in(port, UART_LCR); - serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB); - serial8250_early_out(port, UART_DLL, divisor & 0xff); - serial8250_early_out(port, UART_DLM, (divisor >> 8) & 0xff); - serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB); + if (port->uartclk && device->baud) { + divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud); + c = serial8250_early_in(port, UART_LCR); + serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB); + serial8250_early_out(port, UART_DLL, divisor & 0xff); + serial8250_early_out(port, UART_DLM, (divisor >> 8) & 0xff); + serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB); + } } int __init early_serial8250_setup(struct earlycon_device *device, -- cgit v1.2.3 From 57c77ce47063bd298e31ffcf7875ed0c2633bf8d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:11 +0100 Subject: mlxsw: spectrum_router: Offload decap only for up tunnels When a new local route is added, an IPIP entry is looked up to determine whether the route should be offloaded as a tunnel decap or as a trap. That decision should take into account whether the tunnel netdevice in question is actually IFF_UP, and only install a decap offload if it is. Fixes: 0063587d3587 ("mlxsw: spectrum: Support decap-only IP-in-IP tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 632c7b229054..c529e31fc78b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3996,7 +3996,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, case RTN_LOCAL: ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4, dip); - if (ipip_entry) { + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, -- cgit v1.2.3 From cab43d9c877456d2d8feb77335327316d3de9871 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:12 +0100 Subject: mlxsw: spectrum_router: Demote tunnels on VRF migration The mlxsw driver currently doesn't offload GRE tunnels if they have the same local address and use the same underlay VRF. When such a situation arises, the tunnels in conflict are demoted to slow path. However, the current code only verifies this condition on tunnel creation and tunnel change, not when a tunnel is moved to a different VRF. When the tunnel has no bound device, underlay and overlay are the same. Thus moving a tunnel moves the underlay as well, and that can cause local address conflict. So modify mlxsw_sp_netdevice_ipip_ol_vrf_event() to check if there are any conflicting tunnels, and demote them if yes. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c529e31fc78b..a23e452f7261 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1449,9 +1449,27 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; if (!ipip_entry) return 0; + + /* For flat configuration cases, moving overlay to a different VRF might + * cause local address conflict, and the conflicting tunnels need to be + * demoted. + */ + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, false, false, extack); } -- cgit v1.2.3 From d97cda5f465bacc82659263a885703d73759ea04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:13 +0100 Subject: mlxsw: spectrum_router: Handle encap to demoted tunnels Some tunnels that are offloadable on their own can nonetheless be demoted to slow path if their local address is in conflict with that of another tunnel. When a route is formed for such a tunnel, mlxsw_sp_nexthop_ipip_init() fails to find the corresponding IPIP entry, and that triggers a FIB abort. Resolve the problem by not assuming that a tunnel for which mlxsw_sp_ipip_ops.can_offload() holds also automatically has an IPIP entry. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 61 ++++++++++------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a23e452f7261..17e2153a8dc9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3361,22 +3361,19 @@ static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) return ul_dev ? (ul_dev->flags & IFF_UP) : true; } -static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct net_device *ol_dev) +static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_ipip_entry *ipip_entry) { bool removing; if (!nh->nh_grp->gateway || nh->ipip_entry) - return 0; - - nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (!nh->ipip_entry) - return -ENOENT; + return; - removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev); + nh->ipip_entry = ipip_entry; + removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - return 0; + mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -3421,21 +3418,21 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct fib_nh *fib_nh) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; struct net_device *dev = fib_nh->nh_dev; - enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -4712,21 +4709,21 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct rt6_info *rt) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; struct net_device *dev = rt->dst.dev; - enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; -- cgit v1.2.3 From 09dbf6297fa8bb4433d9293623b464750d874685 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:14 +0100 Subject: mlxsw: spectrum_router: Update nexthop RIF on update The function mlxsw_sp_nexthop_rif_update() walks the list of nexthops associated with a RIF, and updates the corresponding entries in the switch. It is used in particular when a tunnel underlay netdevice moves to a different VRF, and all the nexthops are migrated over to a new RIF. The problem is that each nexthop holds a reference to its RIF, and that is not updated. So after the old RIF is gone, further activity on these nexthops (such as downing the underlay netdevice) dereferences a dangling pointer. Fix the issue by updating rif of impacted nexthops before calling mlxsw_sp_nexthop_rif_update(). Fixes: 0c5f1cd5ba8c ("mlxsw: spectrum_router: Generalize __mlxsw_sp_ipip_entry_update_tunnel()") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 28 ++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 17e2153a8dc9..72ef4f8025f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1370,8 +1370,9 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif); static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1389,17 +1390,18 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) { - list_splice_init(&old_lb_rif->common.nexthop_list, - &new_lb_rif->common.nexthop_list); - mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common); - } + if (keep_encap) + mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common); mlxsw_sp_rif_destroy(&old_lb_rif->common); return 0; } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + /** * Update the offload related to an IPIP entry. This always updates decap, and * in addition to that it also: @@ -3560,6 +3562,18 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif) +{ + struct mlxsw_sp_nexthop *nh; + + list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); + list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) + nh->rif = new_rif; + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { -- cgit v1.2.3 From eb281683621b71ab9710d9dccbbef0c2e1769c97 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:30:52 +0100 Subject: serdev: ttyport: add missing receive_buf sanity checks The receive_buf tty-port callback should return the number of bytes accepted and must specifically never return a negative errno (or a value larger than the buffer size) to the tty layer. A serdev driver not providing a receive_buf callback would currently cause the flush_to_ldisc() worker to spin in a tight loop when the tty buffer pointers are incremented with -EINVAL (-22) after data has been received. A serdev driver occasionally returning a negative errno (or a too large byte count) could cause information leaks or crashes when accessing memory outside the tty buffers in consecutive callbacks. Fixes: cd6484e1830b ("serdev: Introduce new bus for serial attached devices") Cc: stable # 4.11 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index ce7ad0acee7a..09fbdd52a561 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -27,11 +27,22 @@ static int ttyport_receive_buf(struct tty_port *port, const unsigned char *cp, { struct serdev_controller *ctrl = port->client_data; struct serport *serport = serdev_controller_get_drvdata(ctrl); + int ret; if (!test_bit(SERPORT_ACTIVE, &serport->flags)) return 0; - return serdev_controller_receive_buf(ctrl, cp, count); + ret = serdev_controller_receive_buf(ctrl, cp, count); + + dev_WARN_ONCE(&ctrl->dev, ret < 0 || ret > count, + "receive_buf returns %d (count = %zu)\n", + ret, count); + if (ret < 0) + return 0; + else if (ret > count) + return count; + + return ret; } static void ttyport_write_wakeup(struct tty_port *port) -- cgit v1.2.3 From fd00cf81a9a84776ba58e56bd042c726dcf75cf3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:30:53 +0100 Subject: serdev: fix receive_buf return value when no callback The receive_buf callback is supposed to return the number of bytes processed and should specifically not return a negative errno. Due to missing sanity checks in the serdev tty-port controller, a driver not providing a receive_buf callback could cause the flush_to_ldisc() worker to spin in a tight loop when the tty buffer pointers are incremented with -EINVAL (-22). The missing sanity checks have now been added to the tty-port controller, but let's fix up the serdev-controller helper as well. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/serdev.h b/include/linux/serdev.h index e69402d4a8ae..d609e6dc5bad 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -184,7 +184,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, struct serdev_device *serdev = ctrl->serdev; if (!serdev || !serdev->ops->receive_buf) - return -EINVAL; + return 0; return serdev->ops->receive_buf(serdev, data, count); } -- cgit v1.2.3 From 8bcd4e6a8decac251d55c4377e2e67f052777ce0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:30:55 +0100 Subject: serdev: ttyport: fix NULL-deref on hangup Make sure to use a properly refcounted tty_struct in write_wake up to avoid dereferencing a NULL-pointer when a port is being hung up. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Cc: stable # 4.11 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 09fbdd52a561..12cb9139073e 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -49,12 +49,19 @@ static void ttyport_write_wakeup(struct tty_port *port) { struct serdev_controller *ctrl = port->client_data; struct serport *serport = serdev_controller_get_drvdata(ctrl); + struct tty_struct *tty; + + tty = tty_port_tty_get(port); + if (!tty) + return; - if (test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &port->tty->flags) && + if (test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && test_bit(SERPORT_ACTIVE, &serport->flags)) serdev_controller_write_wakeup(ctrl); - wake_up_interruptible_poll(&port->tty->write_wait, POLLOUT); + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); + + tty_kref_put(tty); } static const struct tty_port_client_operations client_ops = { -- cgit v1.2.3 From 90dbad8cd6efccbdce109d5ef0724f8434a6cdde Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:30:56 +0100 Subject: serdev: ttyport: fix tty locking in close Make sure to hold the tty lock as required when calling tty-driver close() (e.g. to avoid racing with hangup()). Note that the serport active flag is currently set under the lock at controller open, but really isn't protected by it. Fixes: cd6484e1830b ("serdev: Introduce new bus for serial attached devices") Signed-off-by: Johan Hovold Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 12cb9139073e..247788a16f0b 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -154,8 +154,10 @@ static void ttyport_close(struct serdev_controller *ctrl) clear_bit(SERPORT_ACTIVE, &serport->flags); + tty_lock(tty); if (tty->ops->close) tty->ops->close(tty, NULL); + tty_unlock(tty); tty_release_struct(tty, serport->tty_idx); } -- cgit v1.2.3 From 1a6152d36dee08da2be2a3030dceb45ef680460a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Nov 2017 23:01:44 +0800 Subject: quota: propagate error from __dquot_initialize In commit 6184fc0b8dd7 ("quota: Propagate error from ->acquire_dquot()"), we have propagated error from __dquot_initialize to caller, but we forgot to handle such error in add_dquot_ref(), so, currently, during quota accounting information initialization flow, if we failed for some of inodes, we just ignore such error, and do account for others, which is not a good implementation. In this patch, we choose to let user be aware of such error, so after turning on quota successfully, we can make sure all inodes disk usage can be accounted, which will be more reasonable. Suggested-by: Jan Kara Signed-off-by: Chao Yu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39f1b0b0c76f..d38684ff2ebb 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type) } /* This routine is guarded by s_umount semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif + int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type) reserved = 1; #endif iput(old_inode); - __dquot_initialize(inode, type); + err = __dquot_initialize(inode, type); + if (err) { + iput(inode); + goto out; + } /* * We hold a reference to 'inode' so it couldn't have been @@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type) } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); - +out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " @@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type) "Please run quotacheck(8)"); } #endif + return err; } /* @@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); - add_dquot_ref(sb, type); - - return 0; + error = add_dquot_ref(sb, type); + if (error) + dquot_disable(sb, type, flags); + return error; out_file_init: dqopt->files[type] = NULL; iput(inode); -- cgit v1.2.3 From ba2d8d887d962c2f790e6dc01b2fd25b4608720b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:48 +0100 Subject: net: mvpp2: fix the txq_init error path When an allocation in the txq_init path fails, the allocated buffers end-up being freed twice: in the txq_init error path, and in txq_deinit. This lead to issues as txq_deinit would work on already freed memory regions: kernel BUG at mm/slub.c:3915! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP This patch fixes this by removing the txq_init own error path, as the txq_deinit function is always called on errors. This was introduced by TSO as way more buffers are allocated. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 6c20e811f973..79f01cd80dd7 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5805,7 +5805,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, sizeof(*txq_pcpu->buffs), GFP_KERNEL); if (!txq_pcpu->buffs) - goto cleanup; + return -ENOMEM; txq_pcpu->count = 0; txq_pcpu->reserved_num = 0; @@ -5821,26 +5821,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, &txq_pcpu->tso_headers_dma, GFP_KERNEL); if (!txq_pcpu->tso_headers) - goto cleanup; + return -ENOMEM; } return 0; -cleanup: - for_each_present_cpu(cpu) { - txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->buffs); - - dma_free_coherent(port->dev->dev.parent, - txq_pcpu->size * TSO_HEADER_SIZE, - txq_pcpu->tso_headers, - txq_pcpu->tso_headers_dma); - } - - dma_free_coherent(port->dev->dev.parent, - txq->size * MVPP2_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_dma); - - return -ENOMEM; } /* Free allocated TXQ resources */ -- cgit v1.2.3 From 26146b0e6b6869c6cd8a45ab3a4a5562e7a91b23 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:49 +0100 Subject: net: mvpp2: cleanup probed ports in the probe error path This patches fixes the probe error path by cleaning up probed ports, to avoid leaving registered net devices when the driver failed to probe. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 79f01cd80dd7..afae4fe00965 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -8329,7 +8329,7 @@ static int mvpp2_probe(struct platform_device *pdev) for_each_available_child_of_node(dn, port_node) { err = mvpp2_port_probe(pdev, port_node, priv, i); if (err < 0) - goto err_mg_clk; + goto err_port_probe; i++; } @@ -8345,12 +8345,19 @@ static int mvpp2_probe(struct platform_device *pdev) priv->stats_queue = create_singlethread_workqueue(priv->queue_name); if (!priv->stats_queue) { err = -ENOMEM; - goto err_mg_clk; + goto err_port_probe; } platform_set_drvdata(pdev, priv); return 0; +err_port_probe: + i = 0; + for_each_available_child_of_node(dn, port_node) { + if (priv->port_list[i]) + mvpp2_port_remove(priv->port_list[i]); + i++; + } err_mg_clk: clk_disable_unprepare(priv->axi_clk); if (priv->hw_version == MVPP22) -- cgit v1.2.3 From e749aca84b10f3987b2ee1f76e0c7d8aacc5653c Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Tue, 28 Nov 2017 14:19:50 +0100 Subject: net: mvpp2: do not disable GMAC padding Short fragmented packets may never be sent by the hardware when padding is disabled. This patch stop modifying the GMAC padding bits, to leave them to their reset value (disabled). Fixes: 3919357fb0bb ("net: mvpp2: initialize the GMAC when using a port") Signed-off-by: Yan Markman [Antoine: commit message] Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index afae4fe00965..5be58b04b95e 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -4629,11 +4629,6 @@ static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; writel(val, port->base + MVPP22_GMAC_CTRL_4_REG); - - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - val |= MVPP2_GMAC_DISABLE_PADDING; - val &= ~MVPP2_GMAC_FLOW_CTRL_MASK; - writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); } else if (phy_interface_mode_is_rgmii(port->phy_interface)) { val = readl(port->base + MVPP22_GMAC_CTRL_4_REG); val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | @@ -4641,10 +4636,6 @@ static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; val &= ~MVPP22_CTRL4_DP_CLK_SEL; writel(val, port->base + MVPP22_GMAC_CTRL_4_REG); - - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - val &= ~MVPP2_GMAC_DISABLE_PADDING; - writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); } /* The port is connected to a copper PHY */ -- cgit v1.2.3 From 76e583c5f50ef539caea6935d37af3595034befb Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:51 +0100 Subject: net: mvpp2: check ethtool sets the Tx ring size is to a valid min value This patch fixes the Tx ring size checks when using ethtool, by adding an extra check in the PPv2 check_ringparam_valid helper. The Tx ring size cannot be set to a value smaller than the minimum number of descriptors needed for TSO. Fixes: 1d17db08c056 ("net: mvpp2: limit TSO segments and use stop/wake thresholds") Suggested-by: Yan Markman Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 5be58b04b95e..d83a78be98a2 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6842,6 +6842,12 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev, else if (!IS_ALIGNED(ring->tx_pending, 32)) new_tx_pending = ALIGN(ring->tx_pending, 32); + /* The Tx ring size cannot be smaller than the minimum number of + * descriptors needed for TSO. + */ + if (new_tx_pending < MVPP2_MAX_SKB_DESCS) + new_tx_pending = ALIGN(MVPP2_MAX_SKB_DESCS, 32); + if (ring->rx_pending != new_rx_pending) { netdev_info(dev, "illegal Rx ring size value %d, round to %d\n", ring->rx_pending, new_rx_pending); -- cgit v1.2.3 From 952b6b3b07877419386e719ff20917170e1ce684 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:26:30 +0100 Subject: net: phy: marvell10g: fix the PHY id mask The Marvell 10G PHY driver supports different hardware revisions, which have their bits 3..0 differing. To get the correct revision number these bits should be ignored. This patch fixes this by using the already defined MARVELL_PHY_ID_MASK (0xfffffff0) instead of the custom 0xffffffff mask. Fixes: 20b2af32ff3f ("net: phy: add Marvell Alaska X 88X3310 10Gigabit PHY support") Suggested-by: Yan Markman Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index aebc08beceba..21b3f36e023a 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -16,6 +16,7 @@ * link takes priority and the other port is completely locked out. */ #include +#include enum { MV_PCS_BASE_T = 0x0000, @@ -338,7 +339,7 @@ static int mv3310_read_status(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = 0x002b09aa, - .phy_id_mask = 0xffffffff, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88x3310", .features = SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Full | @@ -360,7 +361,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { 0x002b09aa, 0xffffffff }, + { 0x002b09aa, MARVELL_PHY_ID_MASK }, { }, }; MODULE_DEVICE_TABLE(mdio, mv3310_tbl); -- cgit v1.2.3 From c8ec2041f549e7f2dee0c34d25381be6f7805f99 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 28 Nov 2017 15:22:20 +0000 Subject: MIPS: Add custom serial.h with BASE_BAUD override for generic kernel Add a custom serial.h header for MIPS, allowing platforms to override the asm-generic version if required. The generic platform uses this header to set BASE_BAUD to 0. The generic platform supports multiple boards, which may have different UART clocks. Also one of the boards supported is the Boston FPGA board, where the UART clock depends on the loaded FPGA bitfile. As such there is no way that the generic kernel can set a compile time default BASE_BAUD. Commit 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") changed the behavior of of_setup_earlycon such that any baud rate set in the device tree is now set in the earlycon structure. The UART driver will then calculate a divisor based on BASE_BAUD and set it. With MIPS generic kernels this resulted in garbage output due to the incorrect uart clock rate being used to calculate a divisor. This commit, combined with "serial: 8250_early: Only set divisor if valid clk & baud" prevents the earlycon code setting a bad divisor and restores earlycon output. Fixes: 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") Cc: stable # 4.14 Signed-off-by: Matt Redfearn Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/Kbuild | 1 - arch/mips/include/asm/serial.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 arch/mips/include/asm/serial.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7c8aab23bce8..b1f66699677d 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += sections.h generic-y += segment.h -generic-y += serial.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h new file mode 100644 index 000000000000..1d830c6666c2 --- /dev/null +++ b/arch/mips/include/asm/serial.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 MIPS Tech, LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __ASM__SERIAL_H +#define __ASM__SERIAL_H + +#ifdef CONFIG_MIPS_GENERIC +/* + * Generic kernels cannot know a correct value for all platforms at + * compile time. Set it to 0 to prevent 8250_early using it + */ +#define BASE_BAUD 0 +#else +#include +#endif + +#endif /* __ASM__SERIAL_H */ -- cgit v1.2.3 From dea521a2b9f96e905fa2bb2f95e23ec00c2ec436 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Nov 2017 20:46:49 +0100 Subject: bnxt_en: Fix an error handling path in 'bnxt_get_module_eeprom()' Error code returned by 'bnxt_read_sfp_module_eeprom_info()' is handled a few lines above when reading the A0 portion of the EEPROM. The same should be done when reading the A2 portion of the EEPROM. In order to correctly propagate an error, update 'rc' in this 2nd call as well, otherwise 0 (success) is returned. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7ce1d4b7e67d..b13ce5ebde8d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2136,8 +2136,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } -- cgit v1.2.3 From f95d5bf03b5ec0d7ea8e552e15abe70c646249b5 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Wed, 22 Nov 2017 16:29:57 +0100 Subject: lmc: Use memdup_user() as a cleanup Fix coccicheck warning which recommends to use memdup_user(): drivers/net/wan/lmc/lmc_main.c:497:27-34: WARNING opportunity for memdup_user Generated by: scripts/coccinelle/memdup_user/memdup_user.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 37b1e0d03e31..90a4ad9a2d08 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -494,18 +494,11 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; } - data = kmalloc(xc.len, GFP_KERNEL); - if (!data) { - ret = -ENOMEM; + data = memdup_user(xc.data, xc.len); + if (IS_ERR(data)) { + ret = PTR_ERR(data); break; } - - if(copy_from_user(data, xc.data, xc.len)) - { - kfree(data); - ret = -ENOMEM; - break; - } printk("%s: Starting load of data Len: %d at 0x%p == 0x%p\n", dev->name, xc.len, xc.data, data); -- cgit v1.2.3 From 08f46070dde2a835a7a5bfd4c70372c27bff5d88 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:06 +0800 Subject: sctp: force SCTP_ERROR_INV_STRM with __u32 when calling sctp_chunk_fail This patch is to force SCTP_ERROR_INV_STRM with right type to fit in sctp_chunk_fail to avoid the sparse error. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a20145b3a949..76ea66be0bbe 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, */ /* Mark as failed send. */ - sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM); if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; -- cgit v1.2.3 From af2697a0273f7665429c47d71ab26f2412af924e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:07 +0800 Subject: sctp: force the params with right types for sctp csum apis Now sctp_csum_xxx doesn't really match the param types of these common csum apis. As sctp_csum_xxx is defined in sctp/checksum.h, many sparse errors occur when make C=2 not only with M=net/sctp but also with other modules that include this header file. This patch is to force them fit in csum apis with the right types. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/checksum.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 4a5b9a306c69..32ee65a30aff 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -48,31 +48,32 @@ static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) /* This uses the crypto implementation of crc32c, which is either * implemented w/ hardware support or resolves to __crc32c_le(). */ - return crc32c(sum, buff, len); + return (__force __wsum)crc32c((__force __u32)sum, buff, len); } static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { - return __crc32c_le_combine(csum, csum2, len); + return (__force __wsum)__crc32c_le_combine((__force __u32)csum, + (__force __u32)csum2, len); } static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = sctp_hdr(skb); - __le32 ret, old = sh->checksum; const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, }; + __le32 old = sh->checksum; + __wsum new; sh->checksum = 0; - ret = cpu_to_le32(~__skb_checksum(skb, offset, skb->len - offset, - ~(__u32)0, &ops)); + new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops); sh->checksum = old; - return ret; + return cpu_to_le32((__force __u32)new); } #endif /* __sctp_checksum_h__ */ -- cgit v1.2.3 From 1ba896f6f52bfafac6dec4ca583cdd9a073858e8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:16:08 +0800 Subject: sctp: remove extern from stream sched Now each stream sched ops is defined in different .c file and added into the global ops in another .c file, it uses extern to make this work. However extern is not good coding style to get them in and even make C=2 reports errors for this. This patch adds sctp_sched_ops_xxx_init for each stream sched ops in their .c file, then get them into the global ops by calling them when initializing sctp module. Fixes: 637784ade221 ("sctp: introduce priority based stream scheduler") Fixes: ac1ed8b82cd6 ("sctp: introduce round robin stream scheduler") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 5 +++++ include/net/sctp/stream_sched.h | 5 +++++ net/sctp/protocol.c | 1 + net/sctp/stream_sched.c | 25 ++++++++++++++++++------- net/sctp/stream_sched_prio.c | 7 ++++++- net/sctp/stream_sched_rr.c | 7 ++++++- 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 749a42882437..906a9c0efa71 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -194,6 +194,11 @@ void sctp_remaddr_proc_exit(struct net *net); */ int sctp_offload_init(void); +/* + * sctp/stream_sched.c + */ +void sctp_sched_ops_init(void); + /* * sctp/stream.c */ diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index c676550a4c7d..5c5da48f65e7 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -69,4 +69,9 @@ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops); +void sctp_sched_ops_prio_init(void); +void sctp_sched_ops_rr_init(void); + #endif /* __sctp_stream_sched_h__ */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f5172c21349b..6a38c2503649 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1499,6 +1499,7 @@ static __init int sctp_init(void) INIT_LIST_HEAD(&sctp_address_families); sctp_v4_pf_init(); sctp_v6_pf_init(); + sctp_sched_ops_init(); status = register_pernet_subsys(&sctp_defaults_ops); if (status) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 0b83ec51e43b..d8c162a4089c 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .unsched_all = sctp_sched_fcfs_unsched_all, }; +static void sctp_sched_ops_fcfs_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); +} + /* API to other parts of the stack */ -extern struct sctp_sched_ops sctp_sched_prio; -extern struct sctp_sched_ops sctp_sched_rr; +static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; -static struct sctp_sched_ops *sctp_sched_ops[] = { - &sctp_sched_fcfs, - &sctp_sched_prio, - &sctp_sched_rr, -}; +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops) +{ + sctp_sched_ops[sched] = sched_ops; +} + +void sctp_sched_ops_init(void) +{ + sctp_sched_ops_fcfs_init(); + sctp_sched_ops_prio_init(); + sctp_sched_ops_rr_init(); +} int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 384dbf3c8760..7997d35dd0fd 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) sctp_sched_prio_unsched(soute); } -struct sctp_sched_ops sctp_sched_prio = { +static struct sctp_sched_ops sctp_sched_prio = { .set = sctp_sched_prio_set, .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, @@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = { .sched_all = sctp_sched_prio_sched_all, .unsched_all = sctp_sched_prio_unsched_all, }; + +void sctp_sched_ops_prio_init(void) +{ + sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio); +} diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 7612a438c5b9..1155692448f1 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) sctp_sched_rr_unsched(stream, soute); } -struct sctp_sched_ops sctp_sched_rr = { +static struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, @@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = { .sched_all = sctp_sched_rr_sched_all, .unsched_all = sctp_sched_rr_unsched_all, }; + +void sctp_sched_ops_rr_init(void) +{ + sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); +} -- cgit v1.2.3 From 57f015f5eccf25fd4a3336fe3cbbee920a8fba6f Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 28 Nov 2017 10:44:29 -0500 Subject: packet: fix crash in fanout_demux_rollover() syzkaller found a race condition fanout_demux_rollover() while removing a packet socket from a fanout group. po->rollover is read and operated on during packet_rcv_fanout(), via fanout_demux_rollover(), but the pointer is currently cleared before the synchronization in packet_release(). It is safer to delay the cleanup until after synchronize_net() has been called, ensuring all calls to packet_rcv_fanout() for this socket have finished. To further simplify synchronization around the rollover structure, set po->rollover in fanout_add() only if there are no errors. This removes the need for rcu in the struct and in the call to packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...). Crashing stack trace: fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392 packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487 dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953 xmit_one net/core/dev.c:2975 [inline] dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995 __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476 dev_queue_xmit+0x17/0x20 net/core/dev.c:3509 neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379 neigh_output include/net/neighbour.h:482 [inline] ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120 ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146 NF_HOOK_COND include/linux/netfilter.h:239 [inline] ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163 dst_output include/net/dst.h:459 [inline] NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250 mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660 mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072 mld_send_initial_cr net/ipv6/mcast.c:2056 [inline] ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079 addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039 addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971 process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432 Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()") Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 32 ++++++++++---------------------- net/packet/internal.h | 1 - 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 737092ca9b4e..1b7bb9d9865e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); - po->rollover = rollover; } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { @@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; + po->rollover = rollover; + rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; @@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: - if (err && rollover) { - kfree_rcu(rollover, rcu); - po->rollover = NULL; - } + kfree(rollover); mutex_unlock(&fanout_mutex); return err; } @@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) list_del(&f->list); else f = NULL; - - if (po->rollover) { - kfree_rcu(po->rollover, rcu); - po->rollover = NULL; - } } mutex_unlock(&fanout_mutex); @@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock) synchronize_net(); if (f) { + kfree(po->rollover); fanout_release_data(f); kfree(f); } @@ -3843,7 +3837,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; - struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3922,18 +3915,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - rcu_read_lock(); - rollover = rcu_dereference(po->rollover); - if (rollover) { - rstats.tp_all = atomic_long_read(&rollover->num); - rstats.tp_huge = atomic_long_read(&rollover->num_huge); - rstats.tp_failed = atomic_long_read(&rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); - } - rcu_read_unlock(); - if (!rollover) + if (!po->rollover) return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; diff --git a/net/packet/internal.h b/net/packet/internal.h index 562fbc155006..a1d2b2319ae9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -95,7 +95,6 @@ struct packet_fanout { struct packet_rollover { int sock; - struct rcu_head rcu; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; -- cgit v1.2.3 From 15fe076edea787807a7cdc168df832544b58eba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Nov 2017 08:03:30 -0800 Subject: net/packet: fix a race in packet_bind() and packet_notifier() syzbot reported crashes [1] and provided a C repro easing bug hunting. When/if packet_do_bind() calls __unregister_prot_hook() and releases po->bind_lock, another thread can run packet_notifier() and process an NETDEV_UP event. This calls register_prot_hook() and hooks again the socket right before first thread is able to grab again po->bind_lock. Fixes this issue by temporarily setting po->num to 0, as suggested by David Miller. [1] dev_remove_pack: ffff8801bf16fa80 not found ------------[ cut here ]------------ kernel BUG at net/core/dev.c:7945! ( BUG_ON(!list_empty(&dev->ptype_all)); ) invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: device syz0 entered promiscuous mode CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801cc57a500 task.stack: ffff8801cc588000 RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945 RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293 RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2 RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810 device syz0 entered promiscuous mode RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8 R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0 FS: 0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106 tun_detach drivers/net/tun.c:670 [inline] tun_chr_close+0x49/0x60 drivers/net/tun.c:2845 __fput+0x333/0x7f0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x199/0x270 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x9bb/0x1ae0 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:968 SYSC_exit_group kernel/exit.c:979 [inline] SyS_exit_group+0x1d/0x20 kernel/exit.c:977 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x44ad19 Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Francesco Ruggeri Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1b7bb9d9865e..da215e5c1399 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3091,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { if (po->running) { rcu_read_unlock(); + /* prevents packet_notifier() from calling + * register_prot_hook() + */ + po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3099,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } + BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; -- cgit v1.2.3 From ea37d5998b50a72b9045ba60a132eeb20e1c4230 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Nov 2017 01:54:00 +0000 Subject: Btrfs: incremental send, fix wrong unlink path after renaming file Under some circumstances, an incremental send operation can issue wrong paths for unlink commands related to files that have multiple hard links and some (or all) of those links were renamed between the parent and send snapshots. Consider the following example: Parent snapshot . (ino 256) |---- a/ (ino 257) | |---- b/ (ino 259) | | |---- c/ (ino 260) | | |---- f2 (ino 261) | | | |---- f2l1 (ino 261) | |---- d/ (ino 262) |---- f1l1_2 (ino 258) |---- f2l2 (ino 261) |---- f1_2 (ino 258) Send snapshot . (ino 256) |---- a/ (ino 257) | |---- f2l1/ (ino 263) | |---- b2/ (ino 259) | |---- c/ (ino 260) | | |---- d3 (ino 262) | | |---- f1l1_2 (ino 258) | | |---- f2l2_2 (ino 261) | | |---- f1_2 (ino 258) | | | |---- f2 (ino 261) | |---- f1l2 (ino 258) | |---- d (ino 261) When computing the incremental send stream the following steps happen: 1) When processing inode 261, a rename operation is issued that renames inode 262, which currently as a path of "d", to an orphan name of "o262-7-0". This is done because in the send snapshot, inode 261 has of its hard links with a path of "d" as well. 2) Two link operations are issued that create the new hard links for inode 261, whose names are "d" and "f2l2_2", at paths "/" and "o262-7-0/" respectively. 3) Still while processing inode 261, unlink operations are issued to remove the old hard links of inode 261, with names "f2l1" and "f2l2", at paths "a/" and "d/". However path "d/" does not correspond anymore to the directory inode 262 but corresponds instead to a hard link of inode 261 (link command issued in the previous step). This makes the receiver fail with a ENOTDIR error when attempting the unlink operation. The problem happens because before sending the unlink operation, we failed to detect that inode 262 was one of ancestors for inode 261 in the parent snapshot, and therefore we didn't recompute the path for inode 262 before issuing the unlink operation for the link named "f2l2" of inode 262. The detection failed because the function "is_ancestor()" only follows the first hard link it finds for an inode instead of all of its hard links (as it was originally created for being used with directories only, for which only one hard link exists). So fix this by making "is_ancestor()" follow all hard links of the input inode. A test case for fstests follows soon. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c10e4c70f02d..20d3300bd268 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3521,7 +3521,40 @@ out: } /* - * Check if ino ino1 is an ancestor of inode ino2 in the given root. + * Check if inode ino2, or any of its ancestors, is inode ino1. + * Return 1 if true, 0 if false and < 0 on error. + */ +static int check_ino_in_path(struct btrfs_root *root, + const u64 ino1, + const u64 ino1_gen, + const u64 ino2, + const u64 ino2_gen, + struct fs_path *fs_path) +{ + u64 ino = ino2; + + if (ino1 == ino2) + return ino1_gen == ino2_gen; + + while (ino > BTRFS_FIRST_FREE_OBJECTID) { + u64 parent; + u64 parent_gen; + int ret; + + fs_path_reset(fs_path); + ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); + if (ret < 0) + return ret; + if (parent == ino1) + return parent_gen == ino1_gen; + ino = parent; + } + return 0; +} + +/* + * Check if ino ino1 is an ancestor of inode ino2 in the given root for any + * possible path (in case ino2 is not a directory and has multiple hard links). * Return 1 if true, 0 if false and < 0 on error. */ static int is_ancestor(struct btrfs_root *root, @@ -3530,36 +3563,91 @@ static int is_ancestor(struct btrfs_root *root, const u64 ino2, struct fs_path *fs_path) { - u64 ino = ino2; - bool free_path = false; + bool free_fs_path = false; int ret = 0; + struct btrfs_path *path = NULL; + struct btrfs_key key; if (!fs_path) { fs_path = fs_path_alloc(); if (!fs_path) return -ENOMEM; - free_path = true; + free_fs_path = true; } - while (ino > BTRFS_FIRST_FREE_OBJECTID) { - u64 parent; - u64 parent_gen; + path = alloc_path_for_send(); + if (!path) { + ret = -ENOMEM; + goto out; + } - fs_path_reset(fs_path); - ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); - if (ret < 0) { - if (ret == -ENOENT && ino == ino2) - ret = 0; - goto out; + key.objectid = ino2; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + u32 cur_offset = 0; + u32 item_size; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + continue; } - if (parent == ino1) { - ret = parent_gen == ino1_gen ? 1 : 0; - goto out; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino2) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + item_size = btrfs_item_size_nr(leaf, slot); + while (cur_offset < item_size) { + u64 parent; + u64 parent_gen; + + if (key.type == BTRFS_INODE_EXTREF_KEY) { + unsigned long ptr; + struct btrfs_inode_extref *extref; + + ptr = btrfs_item_ptr_offset(leaf, slot); + extref = (struct btrfs_inode_extref *) + (ptr + cur_offset); + parent = btrfs_inode_extref_parent(leaf, + extref); + cur_offset += sizeof(*extref); + cur_offset += btrfs_inode_extref_name_len(leaf, + extref); + } else { + parent = key.offset; + cur_offset = item_size; + } + + ret = get_inode_info(root, parent, NULL, &parent_gen, + NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = check_ino_in_path(root, ino1, ino1_gen, + parent, parent_gen, fs_path); + if (ret) + goto out; } - ino = parent; + path->slots[0]++; } + ret = 0; out: - if (free_path) + btrfs_free_path(path); + if (free_fs_path) fs_path_free(fs_path); return ret; } -- cgit v1.2.3 From 9d0ca444d0b317d31acf6f8fc18ac6f478518924 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Nov 2017 17:20:53 +0100 Subject: s390/gs: add compat regset for the guarded storage broadcast control block git commit e525f8a6e696210d15f8b8277d4da12fc4add299 "s390/gs: add regset for the guarded storage broadcast control block" added the missing regset to the s390_regsets array but failed to add it to the s390_compat_regsets array. Fixes: e525f8a6e696 ("add compat regset for the guarded storage broadcast control block") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 26c0523c1488..cd3df5514552 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1650,6 +1650,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, { .core_note_type = NT_S390_RI_CB, .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), -- cgit v1.2.3 From eb1bd249ba016284ed762d87c1989dd822500773 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 28 Nov 2017 18:28:44 +0200 Subject: nvme-rdma: fix memory leak during queue allocation In case nvme_rdma_wait_for_cm timeout expires before we get an established or rejected event (rdma_connect succeeded) from rdma_cm, we end up with leaking the ib transport resources for dedicated queue. This scenario can easily reproduced using traffic test during port toggling. Also, in order to protect from parallel ib queue destruction, that may be invoked from different context's, introduce new flag that stands for transport readiness. While we're here, protect also against a situation that we can receive rdma_cm events during ib queue destruction. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 02ef0771f6b9..37af56596be6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,6 +77,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_ALLOCATED = 0, NVME_RDMA_Q_LIVE = 1, + NVME_RDMA_Q_TR_READY = 2, }; struct nvme_rdma_queue { @@ -390,12 +391,23 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + + if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); - rdma_destroy_qp(queue->cm_id); + /* + * The cm_id object might have been destroyed during RDMA connection + * establishment error flow to avoid getting other cma events, thus + * the destruction of the QP shouldn't use rdma_cm API. + */ + ib_destroy_qp(queue->qp); ib_free_cq(queue->ib_cq); nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, @@ -463,6 +475,8 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); + return 0; out_destroy_ring: @@ -529,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); return ret; } -- cgit v1.2.3 From 7e5dd57ef3081ff6c03908d786ed5087f6fbb7ae Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 25 Nov 2017 03:03:00 +0900 Subject: nvme-pci: fix NULL pointer dereference in nvme_free_host_mem() Following condition which will cause NULL pointer dereference will occur in nvme_free_host_mem() when it tries to remove pci device via nvme_remove() especially after a failure of host memory allocation for HMB. "(host_mem_descs == NULL) && (nr_host_mem_descs != 0)" It's because __nr_host_mem_descs__ is not cleared to 0 unlike __host_mem_descs__ is so. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 617374762b7c..f5800c3c9082 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->nr_host_mem_descs = 0; } static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, -- cgit v1.2.3 From d210a9874b8f6166579408131cb74495caff1958 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: xfs: fortify xfs_alloc_buftarg error handling percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4db6e8d780f6..4c6e86d861fd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1815,22 +1815,27 @@ xfs_alloc_buftarg( btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } -- cgit v1.2.3 From d41c6172bd4031979eab722c265a2e5764383c3c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: xfs: fix leaks on corruption errors in xfs_bmap.c Use _GOTO instead of _RETURN so we can free the allocated cursor on error. Fixes: bf80628 ("xfs: remove xfs_bmse_shift_one") Fixes-coverity-id: 1423813, 1423676 Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e2315..1210f684d3c2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); if (stop_fsb >= got.br_startoff + got.br_blockcount) { error = -EIO; -- cgit v1.2.3 From eda6bc27ccc852d34393739009486932f3ba70ae Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: xfs: fix uninitialized variable in xfs_scrub_quota On the first pass through the while(1) loop, we get to xfs_scrub_should_terminate() which can test the uninitialized error variable. Fixes-coverity-id: 1423737 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 8e58ba842946..613def9692a1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -207,7 +207,7 @@ xfs_scrub_quota( xfs_dqid_t id = 0; uint dqtype; int nimaps; - int error; + int error = 0; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return -ENOENT; -- cgit v1.2.3 From 712d361d59efa6349a9538f4fd9a49073f0e8127 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:33 -0800 Subject: xfs: calculate correct offset in xfs_scrub_quota_item It's only used for tracepoints so it's relatively harmless, but the offset is calculated incorrectly in xfs_scrub_quota_item. qi_dqperchunk is the nr. of dquots per "chunk" which we have conveniently *cough* defined to always be 1 FSB. Therefore block_offset * qi_dqperchunk == first id in that chunk, and so offset = id / qi_dqperchunk id * dqperchunk is ... meaningless. Fixes-coverity-id: 1423965 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 613def9692a1..3d9037eceaf1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -107,7 +107,7 @@ xfs_scrub_quota_item( unsigned long long rcount; xfs_ino_t fs_icount; - offset = id * qi->qi_dqperchunk; + offset = id / qi->qi_dqperchunk; /* * We fed $id and DQNEXT into the xfs_qm_dqget call, which means -- cgit v1.2.3 From 3c02a6d946657e1ae0688e0d89f2dd2cfe9afba8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Nov 2017 10:59:40 +0100 Subject: Revert "ALSA: usb-audio: Fix potential zero-division at parsing FU" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 8428a8ebde2d ("ALSA: usb-audio: Fix potential zero-division at parsing FU") is utterly bogus and breaks the case with csize=1 instead of fixing anything. Just take it back again. Reported-by: Jörg Otte Fixes: 8428a8ebde2d ("ALSA: usb-audio: Fix potential zero-division at parsing FU" Signed-off-by: Takashi Iwai Signed-off-by: Linus Torvalds --- sound/usb/mixer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 0537c6322990..61b348383de8 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1476,9 +1476,9 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, return -EINVAL; } csize = hdr->bControlSize; - if (csize <= 1) { + if (!csize) { usb_audio_dbg(state->chip, - "unit %u: invalid bControlSize <= 1\n", + "unit %u: invalid bControlSize == 0\n", unitid); return -EINVAL; } -- cgit v1.2.3 From f81a348728ec5ac43f3bbcf81c97d52baba253f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Nov 2017 11:59:13 +0000 Subject: arm64: mm: cleanup stale AIVIVT references Since commit: 155433cb365ee466 ("arm64: cache: Remove support for ASID-tagged VIVT I-caches") ... the kernel no longer cares about AIVIVT I-caches, as these were removed from the architecture. This patch removes the stale references to such I-caches. The comment in flush_context() is also updated to clarify when and where the TLB invalidation occurs. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/context.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..28a45a19aae7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -117,7 +117,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } -- cgit v1.2.3 From 250dcd11466e06df64b92520e2c56bdae453581b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 27 Nov 2017 11:28:50 +0100 Subject: mmc: sdhci: Avoid swiotlb buffer being full The commit de3ee99b097d ("mmc: Delete bounce buffer handling") deletes the bounce buffer handling, but also causes the max_req_size for sdhci to be increased, in case when max_segs == 1. This causes errors for sdhci-pci Ricoh variant, about the swiotlb buffer to become full. Fix the issue, by taking IO_TLB_SEGSIZE and IO_TLB_SHIFT into account when deciding the max_req_size for sdhci. Reported-by: Jiri Slaby Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling") Cc: # v4.14+ Signed-off-by: Ulf Hansson Tested-by: Jiri Slaby Acked-by: Adrian Hunter --- drivers/mmc/host/sdhci.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2f14334e42df..e9290a3439d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3650,23 +3651,30 @@ int sdhci_setup_host(struct sdhci_host *host) spin_lock_init(&host->lock); + /* + * Maximum number of sectors in one transfer. Limited by SDMA boundary + * size (512KiB). Note some tuning modes impose a 4MiB limit, but this + * is less anyway. + */ + mmc->max_req_size = 524288; + /* * Maximum number of segments. Depends on if the hardware * can do scatter/gather or not. */ - if (host->flags & SDHCI_USE_ADMA) + if (host->flags & SDHCI_USE_ADMA) { mmc->max_segs = SDHCI_MAX_SEGS; - else if (host->flags & SDHCI_USE_SDMA) + } else if (host->flags & SDHCI_USE_SDMA) { mmc->max_segs = 1; - else /* PIO */ + if (swiotlb_max_segment()) { + unsigned int max_req_size = (1 << IO_TLB_SHIFT) * + IO_TLB_SEGSIZE; + mmc->max_req_size = min(mmc->max_req_size, + max_req_size); + } + } else { /* PIO */ mmc->max_segs = SDHCI_MAX_SEGS; - - /* - * Maximum number of sectors in one transfer. Limited by SDMA boundary - * size (512KiB). Note some tuning modes impose a 4MiB limit, but this - * is less anyway. - */ - mmc->max_req_size = 524288; + } /* * Maximum segment size. Could be one segment with the maximum number -- cgit v1.2.3 From 25415cec502a1232b19fffc85465882b19a90415 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Nov 2017 11:11:41 -0800 Subject: cls_bpf: don't decrement net's refcount when offload fails When cls_bpf offload was added it seemed like a good idea to call cls_bpf_delete_prog() instead of extending the error handling path, since the software state is fully initialized at that point. This handling of errors without jumping to the end of the function is error prone, as proven by later commit missing that extra call to __cls_bpf_delete_prog(). __cls_bpf_delete_prog() is now expected to be invoked with a reference on exts->net or the field zeroed out. The call on the offload's error patch does not fullfil this requirement, leading to each error stealing a reference on net namespace. Create a function undoing what cls_bpf_set_parms() did and use it from __cls_bpf_delete_prog() and the error path. Fixes: aae2c35ec892 ("cls_bpf: use tcf_exts_get_net() before call_rcu()") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a9f3e317055c..6fe798c2df1a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -258,11 +258,8 @@ static int cls_bpf_init(struct tcf_proto *tp) return 0; } -static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +static void cls_bpf_free_parms(struct cls_bpf_prog *prog) { - tcf_exts_destroy(&prog->exts); - tcf_exts_put_net(&prog->exts); - if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); else @@ -270,6 +267,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog->bpf_name); kfree(prog->bpf_ops); +} + +static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +{ + tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); + + cls_bpf_free_parms(prog); kfree(prog); } @@ -514,12 +519,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); - if (ret) { - if (!oldprog) - idr_remove_ext(&head->handle_idr, prog->handle); - __cls_bpf_delete_prog(prog); - return ret; - } + if (ret) + goto errout_parms; if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; @@ -537,6 +538,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = prog; return 0; +errout_parms: + cls_bpf_free_parms(prog); errout_idr: if (!oldprog) idr_remove_ext(&head->handle_idr, prog->handle); -- cgit v1.2.3 From f85729d07cd649bf69820f14bdad36326a24a699 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Nov 2017 14:28:39 +0100 Subject: sch_sfq: fix null pointer dereference at timer expiration While converting sch_sfq to use timer_setup(), the commit cdeabbb88134 ("net: sched: Convert timers to use timer_setup()") forgot to initialize the 'sch' field. As a result, the timer callback tries to dereference a NULL pointer, and the kernel does oops. Fix it initializing such field at qdisc creation time. Fixes: cdeabbb88134 ("net: sched: Convert timers to use timer_setup()") Signed-off-by: Paolo Abeni Acked-by: Cong Wang Acked-by: Kees Cook Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 890f4a4564e7..09c1203c1711 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -724,6 +724,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; + q->sch = sch; timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); err = tcf_block_get(&q->block, &q->filter_list, sch); -- cgit v1.2.3 From a8dd397903a6e57157f6265911f7d35681364427 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 20:56:07 +0800 Subject: sctp: use right member as the param of list_for_each_entry Commit d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock") made a mistake that using 'list' as the param of list_for_each_entry to traverse the retransmit, sacked and abandoned queues, while chunks are using 'transmitted_list' to link into these queues. It could cause NULL dereference panic if there are chunks in any of these queues when peeling off one asoc. So use the chunk member 'transmitted_list' instead in this patch. Fixes: d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3204a9b29407..014847e25648 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, list_for_each_entry(chunk, &t->transmitted, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->retransmit, list) + list_for_each_entry(chunk, &q->retransmit, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->sacked, list) + list_for_each_entry(chunk, &q->sacked, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->abandoned, list) + list_for_each_entry(chunk, &q->abandoned, transmitted_list) cb(chunk); list_for_each_entry(chunk, &q->out_chunk_list, list) -- cgit v1.2.3 From 5eb3d22a8a05af401257e781ddeefc412bfb8542 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 21:12:09 +0800 Subject: bonding: use nla_get_u64 to extract the value for IFLA_BOND_AD_ACTOR_SYSTEM bond_opt_initval expects a u64 type param, it's better to use nla_get_u64 to extract the value here, to eliminate a sparse endianness mismatch warning. Fixes: 171a42c38c6e ("bonding: add netlink support for sys prio, actor sys mac, and port key") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index a1b33aa6054a..9697977b80f0 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -423,7 +423,7 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return -EINVAL; bond_opt_initval(&newval, - nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); + nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); if (err) return err; -- cgit v1.2.3 From fc39c38bdc46c49e1e9166afbeb686634e63cbaf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Nov 2017 21:19:05 +0800 Subject: vxlan: use __be32 type for the param vni in __vxlan_fdb_delete All callers of __vxlan_fdb_delete pass vni with __be32 type, and this param should be declared as __be32 type. Fixes: 3ad7a4b141eb ("vxlan: support fdb and learning in COLLECT_METADATA mode") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7ac487031b4b..19b9cc51079e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -874,8 +874,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, const unsigned char *addr, union vxlan_addr ip, - __be16 port, __be32 src_vni, u32 vni, u32 ifindex, - u16 vid) + __be16 port, __be32 src_vni, __be32 vni, + u32 ifindex, u16 vid) { struct vxlan_fdb *f; struct vxlan_rdst *rd = NULL; -- cgit v1.2.3 From c95c3fe5c744b05647240aaba4a163be36b7f123 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:06:10 +0000 Subject: ambassador: fix incorrect indentation of assignment statement Remove one extraneous level of indentation on assignment statement. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index acf16c323e38..bfc514015b0b 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -2258,7 +2258,7 @@ static int amb_probe(struct pci_dev *pci_dev, PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p", dev->atm_dev->number, dev, dev->atm_dev); - dev->atm_dev->dev_data = (void *) dev; + dev->atm_dev->dev_data = (void *) dev; // register our address amb_esi (dev, dev->atm_dev->esi); -- cgit v1.2.3 From 22dac9f1fdd576c1d03ba00f4a9db9a864a43a70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:24:15 +0000 Subject: atm: fore200e: use %pK to format kernel addresses instead of %x Don't use %x and casting to print out a kernel address, instead use the %pK and remove the casting. Cleans up smatch warning: drivers/atm/fore200e.c:3093 fore200e_proc_read() warn: argument 3 to %08x specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 126855e6cb7d..6ebc4e4820fc 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -3083,8 +3083,8 @@ fore200e_proc_read(struct atm_dev *dev, loff_t* pos, char* page) ASSERT(fore200e_vcc); len = sprintf(page, - " %08x %03d %05d %1d %09lu %05d/%05d %09lu %05d/%05d\n", - (u32)(unsigned long)vcc, + " %pK %03d %05d %1d %09lu %05d/%05d %09lu %05d/%05d\n", + vcc, vcc->vpi, vcc->vci, fore200e_atm2fore_aal(vcc->qos.aal), fore200e_vcc->tx_pdu, fore200e_vcc->tx_min_pdu > 0xFFFF ? 0 : fore200e_vcc->tx_min_pdu, -- cgit v1.2.3 From 4a5def7f6a758aef1a0a3b10e981881c1e914f69 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 27 Nov 2017 05:29:32 -0800 Subject: VSOCK: Don't set sk_state to TCP_CLOSE before testing it A recent commit (3b4477d2dcf2) converted the sk_state to use TCP constants. In that change, vmci_transport_handle_detach was changed such that sk->sk_state was set to TCP_CLOSE before we test whether it is TCP_SYN_SENT. This change moves the sk_state change back to the original locations in that function. Signed-off-by: Jorgen Hansen Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 56573dc85709..a7a73ffe675b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -804,8 +804,6 @@ static void vmci_transport_handle_detach(struct sock *sk) */ if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = TCP_CLOSE; - if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., @@ -815,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } + sk->sk_state = TCP_CLOSE; } sk->sk_state_change(sk); } -- cgit v1.2.3 From 6c9065427072b4e726a2c17f9aa2448d87a92097 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:39:32 +0000 Subject: atm: lanai: use %p to format kernel addresses instead of %x Don't use %x and casting to print out a kernel address, instead use %p and remove the casting. Cleans up smatch warnings: drivers/atm/lanai.c:1589 service_buffer_allocate() warn: argument 2 to %08lX specifier is cast from pointer drivers/atm/lanai.c:2221 lanai_dev_open() warn: argument 4 to %lx specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/lanai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 2351dad78ff5..7e4c2ea3e3f2 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1586,8 +1586,8 @@ static int service_buffer_allocate(struct lanai_dev *lanai) lanai->pci); if (unlikely(lanai->service.start == NULL)) return -ENOMEM; - DPRINTK("allocated service buffer at 0x%08lX, size %zu(%d)\n", - (unsigned long) lanai->service.start, + DPRINTK("allocated service buffer at %p, size %zu(%d)\n", + lanai->service.start, lanai_buf_size(&lanai->service), lanai_buf_size_cardorder(&lanai->service)); /* Clear ServWrite register to be safe */ @@ -2220,9 +2220,9 @@ static int lanai_dev_open(struct atm_dev *atmdev) #endif memcpy(atmdev->esi, eeprom_mac(lanai), ESI_LEN); lanai_timed_poll_start(lanai); - printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d, base=0x%lx, irq=%u " + printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d, base=%p, irq=%u " "(%pMF)\n", lanai->number, (int) lanai->pci->revision, - (unsigned long) lanai->base, lanai->pci->irq, atmdev->esi); + lanai->base, lanai->pci->irq, atmdev->esi); printk(KERN_NOTICE DEV_LABEL "(itf %d): LANAI%s, serialno=%u(0x%X), " "board_rev=%d\n", lanai->number, lanai->type==lanai2 ? "2" : "HB", (unsigned int) lanai->serialno, -- cgit v1.2.3 From 0195a21079c077abfb475a56830b06b37424982a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 13:47:22 +0000 Subject: atm: suni: remove extraneous space to fix indentation Remove a leading space, fixes indentation Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/suni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index b8825f2d79e0..4b044710a8cf 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -177,7 +177,7 @@ static int set_loopback(struct atm_dev *dev,int mode) default: return -EINVAL; } - dev->ops->phy_put(dev, control, reg); + dev->ops->phy_put(dev, control, reg); PRIV(dev)->loop_mode = mode; return 0; } -- cgit v1.2.3 From d51aae68b142f48232257e96ce317db25445418d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 27 Nov 2017 18:37:21 +0100 Subject: net: sched: cbq: create block for q->link.block q->link.block is not initialized, that leads to EINVAL when one tries to add filter there. So initialize it properly. This can be reproduced by: $ tc qdisc add dev eth0 root handle 1: cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit $ tc filter add dev eth0 parent 1: protocol ip prio 100 u32 match ip protocol 0 0x00 flowid 1:1 Reported-by: Jaroslav Aster Reported-by: Ivan Vecera Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Signed-off-by: Jiri Pirko Acked-by: Eelco Chaudron Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6361be7881f1..525eb3a6d625 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); + if (err) + goto put_rtab; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - goto put_rtab; + goto put_block; q->link.sibling = &q->link; q->link.common.classid = sch->handle; @@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; +put_block: + tcf_block_put(q->link.block); + put_rtab: qdisc_put_rtab(q->link.R_tab); return err; -- cgit v1.2.3 From 4650d02ad2d9b2c1c7aa36055166db6aee68f72e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 14 Nov 2017 11:35:37 -0800 Subject: RISC-V: Remove unused arguments from ATOMIC_OP Our atomics are generated from a complicated series of preprocessor macros, each of which is slightly different from the last. When writing the macros I'd accidentally left some unused arguments floating around. This patch removes the unused macro arguments. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 94 ++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e2e37c57cbeb..40c73dd59c15 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + "amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i) -ATOMIC_OPS(sub, add, +, -i) -ATOMIC_OPS(and, and, &, i) -ATOMIC_OPS( or, or, |, i) -ATOMIC_OPS(xor, xor, ^, i) +ATOMIC_OPS(add, add, i) +ATOMIC_OPS(sub, add, -i) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OP #undef ATOMIC_OPS @@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i) * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ { \ register c_type ret; \ @@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) #endif @@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, ) #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) #endif -ATOMIC_OPS(and, and, &, i, , _relaxed) -ATOMIC_OPS(and, and, &, i, .aq , _acquire) -ATOMIC_OPS(and, and, &, i, .rl , _release) -ATOMIC_OPS(and, and, &, i, .aqrl, ) +ATOMIC_OPS(and, and, i, , _relaxed) +ATOMIC_OPS(and, and, i, .aq , _acquire) +ATOMIC_OPS(and, and, i, .rl , _release) +ATOMIC_OPS(and, and, i, .aqrl, ) -ATOMIC_OPS( or, or, |, i, , _relaxed) -ATOMIC_OPS( or, or, |, i, .aq , _acquire) -ATOMIC_OPS( or, or, |, i, .rl , _release) -ATOMIC_OPS( or, or, |, i, .aqrl, ) +ATOMIC_OPS( or, or, i, , _relaxed) +ATOMIC_OPS( or, or, i, .aq , _acquire) +ATOMIC_OPS( or, or, i, .rl , _release) +ATOMIC_OPS( or, or, i, .aqrl, ) -ATOMIC_OPS(xor, xor, ^, i, , _relaxed) -ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, ^, i, .rl , _release) -ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) +ATOMIC_OPS(xor, xor, i, , _relaxed) +ATOMIC_OPS(xor, xor, i, .aq , _acquire) +ATOMIC_OPS(xor, xor, i, .rl , _release) +ATOMIC_OPS(xor, xor, i, .aqrl, ) #undef ATOMIC_OPS @@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ { \ atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ { \ return atomic##prefix##_fetch_##func_op(I, v); \ @@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ + ATOMIC_OP (op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif -- cgit v1.2.3 From 8286d51a6c244738aeb071fcd7d2e36a3374e150 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:02:50 -0800 Subject: RISC-V: Comment on why {,cmp}xchg is ordered how it is This is another memory model FIXME. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 40c73dd59c15..e65d1cd89e28 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. We just - * use the other implementations directly. + * {cmp,}xchg and the operations that return, so they need a barrier. + */ +/* + * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by + * assigning the same barrier to both the LR and SC operations, but that might + * not make any sense. We're waiting on a memory model specification to + * determine exactly what the right thing to do is here. */ #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ -- cgit v1.2.3 From 61a60d35b7d1b0b3a31bc21d15805a3654f60920 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:48 -0800 Subject: RISC-V: Remove __smp_bp__{before,after}_atomic These duplicate the asm-generic definitions are therefor aren't useful. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 183534b7c39b..455ee16127fb 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,21 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These fences exist to enforce ordering around the relaxed AMOs. The - * documentation defines that - * " - * atomic_fetch_add(); - * is equivalent to: - * smp_mb__before_atomic(); - * atomic_fetch_add_relaxed(); - * smp_mb__after_atomic(); - * " - * So we emit full fences on both sides. - */ -#define __smb_mb__before_atomic() smp_mb() -#define __smb_mb__after_atomic() smp_mb() - /* * These barriers prevent accesses performed outside a spinlock from being moved * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only -- cgit v1.2.3 From 3343eb6806f365b9e3d451040671fa9336e57513 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:55 -0800 Subject: RISC-V: Remove smb_mb__{before,after}_spinlock() These are obselete. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 455ee16127fb..773c4e039cd7 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,14 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These barriers prevent accesses performed outside a spinlock from being moved - * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only - * enforce release consistency, we need full fences here. - */ -#define smb_mb__before_spinlock() smp_mb() -#define smb_mb__after_spinlock() smp_mb() - #include #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 9347ce54cd699db92d37e66191aa4b9a0a92304e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:04:05 -0800 Subject: RISC-V: __test_and_op_bit_ord should be strongly ordered I mis-read the documentation. After looking at it again the documentation is actually as clear as it can be, it's just that I didn't actually read it in order and therefor did the wrong thing. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 7c281ef1d583..f30daf26f08f 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -67,7 +67,7 @@ : "memory"); #define __test_and_op_bit(op, mod, nr, addr) \ - __test_and_op_bit_ord(op, mod, nr, addr, ) + __test_and_op_bit_ord(op, mod, nr, addr, .aqrl) #define __op_bit(op, mod, nr, addr) \ __op_bit_ord(op, mod, nr, addr, ) -- cgit v1.2.3 From 21db403660d1433b8a02b26d5d4084921b857c40 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:05:04 -0800 Subject: RISC-V: Add READ_ONCE in arch_spin_is_locked() This was just incorrect in the original version. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 04c71d938afd..a6a005c4f2fb 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -24,7 +24,7 @@ /* FIXME: Replace this with a ticket lock, like MIPS. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) static inline void arch_spin_unlock(arch_spinlock_t *lock) { -- cgit v1.2.3 From c901e45a999a1935d7adf653e1cf12dfbcd737aa Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:17 -0800 Subject: RISC-V: `sfence.vma` orderes the instruction cache This is just a comment change, but it's one that bit me on the mailing list. It turns out that issuing a `sfence.vma` enforces instruction cache ordering in addition to TLB ordering. This isn't explicitly called out in the ISA manual, but Andrew will be making that more clear in a future revision. CC: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e..c79fab3d377d 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,7 +17,10 @@ #ifdef CONFIG_MMU -/* Flush entire local TLB */ +/* + * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction + * cache as well, so a 'fence.i' is not necessary. + */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); -- cgit v1.2.3 From bf730552734372e45b10fe056726de1950fdfdde Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:31 -0800 Subject: RISC-V: remove spin_unlock_wait() This was removed from the other architectures in commit 952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific definitions"). That landed between when we got upstream and when our patches were reviewed, so this is a followup patch. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index a6a005c4f2fb..2fd27e8ef1fd 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) } } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_rmb(); - do { - cpu_relax(); - } while (arch_spin_is_locked(lock)); - smp_acquire__after_ctrl_dep(); -} - /***********************************************************/ static inline void arch_read_lock(arch_rwlock_t *lock) -- cgit v1.2.3 From b43aaee69d4327d05e7624d9471c17d015b4d67d Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 21 Nov 2017 09:08:07 -0500 Subject: drm/amdgpu: move UVD/VCE and VCN structure out from union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the enablement of VCN Dec and Enc from user space, User space queries kernel for the IP information, if HW has UVD/VCE, the info comes from these IP blocks, but this could end up mis-interpret for VCN when they are in the union, the other way same when HW with VCN block. Signed-off-by: Leo Liu Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Fixes: 95d0906f8506 ("drm/amdgpu: add initial vcn support and decode tests") Cc: stable@vger.kernel.org Reviewed-and-Tested-by: Michel Dänzer --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5afaf6016b4a..c25cedff4915 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1572,18 +1572,14 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - union { - struct { - /* uvd */ - struct amdgpu_uvd uvd; - - /* vce */ - struct amdgpu_vce vce; - }; - - /* vcn */ - struct amdgpu_vcn vcn; - }; + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + + /* vcn */ + struct amdgpu_vcn vcn; /* firmwares */ struct amdgpu_firmware firmware; -- cgit v1.2.3 From ed162fe7643023b52cc21998a6b3eff15a233c5e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Nov 2017 15:41:30 -0500 Subject: drm/amdgpu/gfx7: cache raster_config values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We did this for gfx6 and 8, but somehow missed gfx7. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5c8a7a48a4ad..419ba0ce7ee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1819,6 +1819,22 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.backend_enable_mask, num_rb_pipes); } + + /* cache the values for userspace */ + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + adev->gfx.config.rb_config[i][j].rb_backend_disable = + RREG32(mmCC_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].user_rb_backend_disable = + RREG32(mmGC_USER_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].raster_config = + RREG32(mmPA_SC_RASTER_CONFIG); + adev->gfx.config.rb_config[i][j].raster_config_1 = + RREG32(mmPA_SC_RASTER_CONFIG_1); + } + } + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } -- cgit v1.2.3 From aca31681b1a594dd1a25a51ff2c58f4f4a165446 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Nov 2017 13:53:29 -0500 Subject: drm/amdgpu: used cached gca values for cik_read_register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the cached values has less latency for bare metal and prevents reading back bogus values if the engine is powergated. This was implemented for VI and SI, but somehow CIK got missed. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik.c | 111 +++++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 793b1470284d..a296f7bbe57c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1023,22 +1023,101 @@ static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {mmPA_SC_RASTER_CONFIG_1, true}, }; -static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, - u32 reg_offset) + +static uint32_t cik_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) { - uint32_t val; + if (indexed) { + uint32_t val; + unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num; + unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num; + + switch (reg_offset) { + case mmCC_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable; + case mmGC_USER_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable; + case mmPA_SC_RASTER_CONFIG: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config; + case mmPA_SC_RASTER_CONFIG_1: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1; + } - mutex_lock(&adev->grbm_idx_mutex); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); - val = RREG32(reg_offset); + val = RREG32(reg_offset); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - return val; + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; + } else { + unsigned idx; + + switch (reg_offset) { + case mmGB_ADDR_CONFIG: + return adev->gfx.config.gb_addr_config; + case mmMC_ARB_RAMCFG: + return adev->gfx.config.mc_arb_ramcfg; + case mmGB_TILE_MODE0: + case mmGB_TILE_MODE1: + case mmGB_TILE_MODE2: + case mmGB_TILE_MODE3: + case mmGB_TILE_MODE4: + case mmGB_TILE_MODE5: + case mmGB_TILE_MODE6: + case mmGB_TILE_MODE7: + case mmGB_TILE_MODE8: + case mmGB_TILE_MODE9: + case mmGB_TILE_MODE10: + case mmGB_TILE_MODE11: + case mmGB_TILE_MODE12: + case mmGB_TILE_MODE13: + case mmGB_TILE_MODE14: + case mmGB_TILE_MODE15: + case mmGB_TILE_MODE16: + case mmGB_TILE_MODE17: + case mmGB_TILE_MODE18: + case mmGB_TILE_MODE19: + case mmGB_TILE_MODE20: + case mmGB_TILE_MODE21: + case mmGB_TILE_MODE22: + case mmGB_TILE_MODE23: + case mmGB_TILE_MODE24: + case mmGB_TILE_MODE25: + case mmGB_TILE_MODE26: + case mmGB_TILE_MODE27: + case mmGB_TILE_MODE28: + case mmGB_TILE_MODE29: + case mmGB_TILE_MODE30: + case mmGB_TILE_MODE31: + idx = (reg_offset - mmGB_TILE_MODE0); + return adev->gfx.config.tile_mode_array[idx]; + case mmGB_MACROTILE_MODE0: + case mmGB_MACROTILE_MODE1: + case mmGB_MACROTILE_MODE2: + case mmGB_MACROTILE_MODE3: + case mmGB_MACROTILE_MODE4: + case mmGB_MACROTILE_MODE5: + case mmGB_MACROTILE_MODE6: + case mmGB_MACROTILE_MODE7: + case mmGB_MACROTILE_MODE8: + case mmGB_MACROTILE_MODE9: + case mmGB_MACROTILE_MODE10: + case mmGB_MACROTILE_MODE11: + case mmGB_MACROTILE_MODE12: + case mmGB_MACROTILE_MODE13: + case mmGB_MACROTILE_MODE14: + case mmGB_MACROTILE_MODE15: + idx = (reg_offset - mmGB_MACROTILE_MODE0); + return adev->gfx.config.macrotile_mode_array[idx]; + default: + return RREG32(reg_offset); + } + } } static int cik_read_register(struct amdgpu_device *adev, u32 se_num, @@ -1048,13 +1127,13 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(cik_allowed_read_registers); i++) { + bool indexed = cik_allowed_read_registers[i].grbm_indexed; + if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; -- cgit v1.2.3 From b693fc1f83bc9aa5e86c87ac7da48870e45bf486 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Nov 2017 17:46:50 -0500 Subject: Revert "drm/amdgpu: fix rmmod KCQ disable failed error" This reverts commit 446947b44fb8cabc0213ff4efd706931e36b1963. this patch is incorrrect, amdgpu_ucode_bo_fini always called after gfx_hw_fini. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2c574374d9b6..3573ecdb06ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1837,9 +1837,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_fini_bo(adev); - for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 033fba2def6f..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -164,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7714f4a6c8b0..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -442,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); -- cgit v1.2.3 From 89ce6e0afee8eafa679093207dabd717af9d09c5 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: drm/amdgpu: Set adev->vcn.irq.num_types for VCN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were setting adev->uvd.irq.num_types instead. Fixes: 9b257116e784 ("drm/amdgpu: add vcn enc irq support") Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1eb4d79d6e30..0450ac5ba6b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1175,7 +1175,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; } -- cgit v1.2.3 From fa7c7939b4bf112cd06ba166b739244077898990 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: drm/amdgpu: Use unsigned ring indices in amdgpu_queue_mgr_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This matches the corresponding UAPI fields. Treating the ring index as signed could result in accessing random unrelated memory if the MSB was set. Fixes: effd924d2f3b ("drm/amdgpu: untie user ring ids from kernel ring ids v6") Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c25cedff4915..0b14b5373783 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -717,7 +717,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr); int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 190e28cb827e..93d86619e802 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -63,7 +63,7 @@ static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, static int amdgpu_identity_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int ring, + u32 ring, struct amdgpu_ring **out_ring) { switch (mapper->hw_ip) { @@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) static int amdgpu_lru_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int user_ring, bool lru_pipe_order, + u32 user_ring, bool lru_pipe_order, struct amdgpu_ring **out_ring) { int r, i, j; @@ -208,7 +208,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, */ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring) { int r, ip_num_rings; -- cgit v1.2.3 From 6edc6910ba4cd6eab309263539c8f09b8ad772bf Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a57cec737c18..57abf7abd7a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -409,6 +409,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate->robj == validated) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ -- cgit v1.2.3 From acc34503bd22bd826bbf7b7f95c84a06bb6380e4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2017 14:50:01 -0400 Subject: drm/amdgpu: drop experimental flag for raven Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ec96bb1f9eaf..c2f414ffb2cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -536,7 +536,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, /* Raven */ - {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0, 0, 0} }; -- cgit v1.2.3 From 1b6c80674192bd6b235de7bc58f0bd03eb7f89df Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 13 Oct 2017 12:13:02 -0400 Subject: drm/amd/display: Add null check for 24BPP (xfm and dpp) Fixes Nullptr error when trying 24BPP Signed-off-by: Bhawanpreet Lakha Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d1cdf9f8853d..a2f9be3716cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -856,6 +856,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + /* Taps calculations */ if (pipe_ctx->plane_res.xfm != NULL) res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( @@ -864,16 +865,21 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (pipe_ctx->plane_res.dpp != NULL) res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); - if (!res) { /* Try 24 bpp linebuffer */ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_24BPP; - res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( - pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); - - res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( - pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + if (pipe_ctx->plane_res.xfm != NULL) + res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( + pipe_ctx->plane_res.xfm, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); + + if (pipe_ctx->plane_res.dpp != NULL) + res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( + pipe_ctx->plane_res.dpp, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); } if (res) -- cgit v1.2.3 From 827f11e97dbc591f0c9619151b9a89f082e8ecb8 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Tue, 17 Oct 2017 17:18:24 -0400 Subject: drm/amd/display: Should disable when new stream is null core_link_disable_stream should be called when the new stream is null (i.e. want to disable). Modify the if condition to reflect that. Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 1229a3315018..be14a2665072 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1868,8 +1868,10 @@ static void dce110_reset_hw_ctx_wrap( pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { struct clock_source *old_clk = pipe_ctx_old->clock_source; - /* disable already, no need to disable again */ - if (pipe_ctx->stream && !pipe_ctx->stream->dpms_off) + /* Disable if new stream is null. O/w, if stream is + * disabled already, no need to disable again. + */ + if (!pipe_ctx->stream || !pipe_ctx->stream->dpms_off) core_link_disable_stream(pipe_ctx_old, FREE_ACQUIRED_RESOURCE); pipe_ctx_old->stream_res.tg->funcs->set_blank(pipe_ctx_old->stream_res.tg, true); -- cgit v1.2.3 From 6bffebc90c23e2341a1f8371e7b496ec94136e47 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 18 Oct 2017 20:22:40 -0400 Subject: drm/amd/display: Add timing validation against dongle cap For DP active dongles, the dpcd dongle caps are read but not used to validate mode timing. This addresses this. In particular, this change fixes light up on the HDMI 4k TV connected through DP active dongle. Since the 4k TV defaults to YCbCr420, which the dongle don't support. This change does not address MST cases, a more generalized approach must be taken for that. Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 70 +++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 21 ++++++- drivers/gpu/drm/amd/display/dc/inc/core_status.h | 2 +- 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index fe63f5894d43..15625fd94455 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -128,6 +128,7 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; enc_init.channel = CHANNEL_ID_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 0602610489d7..73077869151c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,12 +1801,75 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } +bool dp_active_dongle_validate_timing( + const struct dc_crtc_timing *timing, + const struct dc_dongle_caps *dongle_caps) +{ + unsigned int required_pix_clk = timing->pix_clk_khz; + + if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || + dongle_caps->extendedCapValid == false) + return true; + + /* Check Pixel Encoding */ + switch (timing->pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + break; + case PIXEL_ENCODING_YCBCR422: + if (!dongle_caps->is_dp_hdmi_ycbcr422_pass_through) + return false; + break; + case PIXEL_ENCODING_YCBCR420: + if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through) + return false; + break; + default: + /* Invalid Pixel Encoding*/ + return false; + } + + + /* Check Color Depth and Pixel Clock */ + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + required_pix_clk /= 2; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + case COLOR_DEPTH_888: + /*888 and 666 should always be supported*/ + break; + case COLOR_DEPTH_101010: + if (dongle_caps->dp_hdmi_max_bpc < 10) + return false; + required_pix_clk = required_pix_clk * 10 / 8; + break; + case COLOR_DEPTH_121212: + if (dongle_caps->dp_hdmi_max_bpc < 12) + return false; + required_pix_clk = required_pix_clk * 12 / 8; + break; + + case COLOR_DEPTH_141414: + case COLOR_DEPTH_161616: + default: + /* These color depths are currently not supported */ + return false; + } + + if (required_pix_clk > dongle_caps->dp_hdmi_max_pixel_clk) + return false; + + return true; +} + enum dc_status dc_link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, const struct dc_crtc_timing *timing) { uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; + struct dc_dongle_caps *dongle_caps = &link->link_status.dpcd_caps->dongle_caps; /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle @@ -1814,8 +1877,13 @@ enum dc_status dc_link_validate_mode_timing( if (link->remote_sinks[0]) return DC_OK; + /* Passive Dongle */ if (0 != max_pix_clk && timing->pix_clk_khz > max_pix_clk) - return DC_EXCEED_DONGLE_MAX_CLK; + return DC_EXCEED_DONGLE_CAP; + + /* Active Dongle*/ + if (!dp_active_dongle_validate_timing(timing, dongle_caps)) + return DC_EXCEED_DONGLE_CAP; switch (stream->signal) { case SIGNAL_TYPE_EDP: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index ced42484dcfc..8e97b42a03a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2062,6 +2062,24 @@ bool is_dp_active_dongle(const struct dc_link *link) (dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER); } +static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) +{ + switch (bpc) { + case DOWN_STREAM_MAX_8BPC: + return 8; + case DOWN_STREAM_MAX_10BPC: + return 10; + case DOWN_STREAM_MAX_12BPC: + return 12; + case DOWN_STREAM_MAX_16BPC: + return 16; + default: + break; + } + + return -1; +} + static void get_active_converter_info( uint8_t data, struct dc_link *link) { @@ -2131,7 +2149,8 @@ static void get_active_converter_info( hdmi_caps.bits.YCrCr420_CONVERSION; link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc = - hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT; + translate_dpcd_max_bpc( + hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); link->dpcd_caps.dongle_caps.extendedCapValid = true; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index 01df85641684..94fc31080fda 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -38,7 +38,7 @@ enum dc_status { DC_FAIL_DETACH_SURFACES = 8, DC_FAIL_SURFACE_VALIDATE = 9, DC_NO_DP_LINK_BANDWIDTH = 10, - DC_EXCEED_DONGLE_MAX_CLK = 11, + DC_EXCEED_DONGLE_CAP = 11, DC_SURFACE_PIXEL_FORMAT_UNSUPPORTED = 12, DC_FAIL_BANDWIDTH_VALIDATE = 13, /* BW and Watermark validation */ DC_FAIL_SCALING = 14, -- cgit v1.2.3 From 3eb4eba42263cc1e810d79b4970cbcb096c210ab Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 20 Oct 2017 10:15:18 -0400 Subject: drm/amd/display: Fix S3 topology change Clean fake sink flag on resume if real sink connected. Fixing S3 topology change problem like this: 1) x desktop with 1 or > displays 2) unplug display 3) suspend 4) replug same display 5) resume without this change replugged display doesn't light up Signed-off-by: Roman Li Reviewed-by: Sun peng Li Acked-by: Harry Wentland Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 889ed24084e8..009aaeb263e3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -677,6 +677,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) mutex_lock(&aconnector->hpd_lock); dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + + if (aconnector->fake_enable && aconnector->dc_link->local_sink) + aconnector->fake_enable = false; + aconnector->dc_sink = NULL; amdgpu_dm_update_connector_after_detect(aconnector); mutex_unlock(&aconnector->hpd_lock); -- cgit v1.2.3 From 4ddd76d1ce00bb0d78e73e29fd9062ecd6bad611 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 23 Oct 2017 15:37:37 -0400 Subject: drm/amd/display: fix split recout calculation Recout split rounding code was wrong Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a2f9be3716cf..ced339a145c6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,14 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { - pipe_ctx->plane_res.scl_data.recout.height /= 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; /* Floor primary pipe, ceil 2ndary pipe */ - pipe_ctx->plane_res.scl_data.recout.height += pipe_ctx->plane_res.scl_data.recout.height % 2; + pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { - pipe_ctx->plane_res.scl_data.recout.width /= 2; + pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->plane_res.scl_data.recout.width += pipe_ctx->plane_res.scl_data.recout.width % 2; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { -- cgit v1.2.3 From 58fe8990fc29336c9ea6531d9ecc44466d3b9221 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 23 Oct 2017 09:11:46 -0400 Subject: drm/amd/display: Handle as MST first and then DP dongle if sink support both Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Reviewed-by: Wenjing Liu Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 73077869151c..e27ed4a45265 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -480,22 +480,6 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; detect_dp_sink_caps(link); - /* DP active dongles */ - if (is_dp_active_dongle(link)) { - link->type = dc_connection_active_dongle; - if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { - /* - * active dongle unplug processing for short irq - */ - link_disconnect_sink(link); - return; - } - - if (link->dpcd_caps.dongle_type != - DISPLAY_DONGLE_DP_HDMI_CONVERTER) { - *converter_disable_audio = true; - } - } if (is_mst_supported(link)) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT_MST; link->type = dc_connection_mst_branch; @@ -535,6 +519,22 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; } } + + if (link->type != dc_connection_mst_branch && + is_dp_active_dongle(link)) { + /* DP active dongles */ + link->type = dc_connection_active_dongle; + if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { + /* + * active dongle unplug processing for short irq + */ + link_disconnect_sink(link); + return; + } + + if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER) + *converter_disable_audio = true; + } } else { /* DP passive dongles */ sink_caps->signal = dp_passive_dongle_detection(link->ddc, -- cgit v1.2.3 From 1c72be981e8a18b4b4ad9d7fc4620bd19c2aba7f Mon Sep 17 00:00:00 2001 From: Andrew Jiang Date: Tue, 24 Oct 2017 12:00:55 -0400 Subject: drm/amd/display: Don't reject 3D timings Signed-off-by: Andrew Jiang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c index c7333cdf1802..fced178c8c79 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c @@ -496,9 +496,6 @@ static bool tgn10_validate_timing( timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) return false; - if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && - tg->ctx->dc->debug.disable_stereo_support) - return false; /* Temporarily blocking interlacing mode until it's supported */ if (timing->flags.INTERLACE == 1) return false; -- cgit v1.2.3 From 116b2632ab033b2a755c1c7b703fede6860b3140 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 24 Oct 2017 17:57:38 -0400 Subject: drm/amd/display: fix split recout offset Previous recout calculation fix changed recout size rounding and affected the offset when it should not have Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ced339a145c6..1cd15d1cc3f4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,12 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2; /* Floor primary pipe, ceil 2ndary pipe */ pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { + pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width / 2; pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { -- cgit v1.2.3 From 30ec2b9717c1c6616332f8ce7c0cb3dd72032a2e Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 3 Nov 2017 16:04:34 -0400 Subject: drm/amd/display: Check aux channel before MST resume It is to fix: MST display failed to resume from S3 At the beginning of resume from S3, need to check if mgr->aux is NULL. Fake MST encoder doesn't have real aux channel. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 009aaeb263e3..5293604a894b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -520,7 +520,8 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); - if (aconnector->dc_link->type == dc_connection_mst_branch) { + if (aconnector->dc_link->type == dc_connection_mst_branch && + aconnector->mst_mgr.aux) { DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n", aconnector, aconnector->base.base.id); -- cgit v1.2.3 From 16fb754a294d25f4077e1f475d930c22698f442a Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 10 Nov 2017 13:53:53 -0500 Subject: drm/amd/display: fix split viewport rounding error Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ++++ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..983987f7c983 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,6 +890,10 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1cd15d1cc3f4..655e08df48a0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -516,13 +516,11 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; if (right_view) { - data->viewport.width /= 2; - data->viewport_c.width /= 2; - data->viewport.x += data->viewport.width; - data->viewport_c.x += data->viewport_c.width; + data->viewport.x += data->viewport.width / 2; + data->viewport_c.x += data->viewport_c.width / 2; /* Ceil offset pipe */ - data->viewport.width += data->viewport.width % 2; - data->viewport_c.width += data->viewport_c.width % 2; + data->viewport.width = (data->viewport.width + 1) / 2; + data->viewport_c.width = (data->viewport_c.width + 1) / 2; } else { data->viewport.width /= 2; data->viewport_c.width /= 2; -- cgit v1.2.3 From 1b61973f9e0e7724bdc779ef3f9b55bd21b08db4 Mon Sep 17 00:00:00 2001 From: Jordan Lazare Date: Mon, 13 Nov 2017 17:57:33 -0400 Subject: drm/amd/display: Revert noisy assert messages This partially reverts commit 4fb48bb66211 ("dc: fix split viewport rounding error"). Signed-off-by: Jordan Lazare Reviewed-by: Dmytro Laktyushkin Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 983987f7c983..3dce35e66b09 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,10 +890,6 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } - ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value - || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); - ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value - || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); -- cgit v1.2.3 From 8ffca5dca093cdd25264e782cc0c4539cb318925 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Fri, 10 Nov 2017 15:02:19 -0500 Subject: drm/amd/display: Do DC mode-change check when adding CRTCs Within atomic check, dm_update_crtcs_state is called twice. First to remove from the dc_state, and subsequently to add to it. In both calls, a secondary mode-change check is done using dc-level states. We shouldn't be doing this while removing, since a new dc_stream_state has not been created to do the necessary comparison. Because of this, the mode_changed flag within the DRM state can be mistakenly set to false. Doing so only when adding prevents this. We are also guaranteed that a call to add will come after remove, or else the atomic check fails (and a commit will not happen). Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5293604a894b..8b0fb25d84a6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4460,7 +4460,7 @@ static int dm_update_crtcs_state(struct dc *dc, } } - if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && + if (enable && dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { new_crtc_state->mode_changed = false; -- cgit v1.2.3 From 93984bbc70b3f321b8f6a3ec303e31b084e54230 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Mon, 20 Nov 2017 10:37:08 +0530 Subject: drm/amd/display: check plane state before validating fbc While validation fbc, array_mode of the pipe is accessed without checking plane_state exists for it. Causing to null pointer dereferencing followed by reboot when a crtc associated with external display(not connected) is page flipped. This patch adds a check for plane_state before using it to validate fbc. Signed-off-by: Shirish S Reviewed-by: Roman Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index be14a2665072..c3de7f305cd3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1774,6 +1774,10 @@ static enum dc_status validate_fbc(struct dc *dc, if (pipe_ctx->stream->sink->link->psr_enabled) return DC_ERROR_UNEXPECTED; + /* Nothing to compress */ + if (!pipe_ctx->plane_state) + return DC_ERROR_UNEXPECTED; + /* Only for non-linear tiling */ if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) return DC_ERROR_UNEXPECTED; -- cgit v1.2.3 From 2f2c3b36fc5a3dc9fd2cf22a2f368502a70eea5d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 16:47:35 +0000 Subject: drm/amd/display: fix memory leaks on error exit return Currently in the case where some of the allocations fail for dce110_tgv, dce110_xfmv, dce110_miv or dce110_oppv then the exit return path ends up leaking allocated objects. Fix this by kfree'ing them before returning. Also re-work the comparison of the null pointers to use the !ptr idiom. Detected by CoverityScan, CID#1460246, 1460325, 1460324, 1460392 ("Resource Leak") Fixes: c4562236b3bc ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Colin Ian King Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index db96d2b47ff1..61adb8174ce0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1037,11 +1037,13 @@ static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) struct dce110_opp *dce110_oppv = kzalloc(sizeof(*dce110_oppv), GFP_KERNEL); - if ((dce110_tgv == NULL) || - (dce110_xfmv == NULL) || - (dce110_miv == NULL) || - (dce110_oppv == NULL)) - return false; + if (!dce110_tgv || !dce110_xfmv || !dce110_miv || !dce110_oppv) { + kfree(dce110_tgv); + kfree(dce110_xfmv); + kfree(dce110_miv); + kfree(dce110_oppv); + return false; + } dce110_opp_v_construct(dce110_oppv, ctx); -- cgit v1.2.3 From 2b7c97d687e81db07d8c67b32ff920e7bf59444e Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:27:31 -0500 Subject: drm/amd/display: fix seq issue: turn on clock before programming afmt. Signed-off-by: Charlene Liu Reviewed-by: Krunoslav Kovac Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dce/dce_stream_encoder.c | 5 +++++ .../amd/display/dc/dce110/dce110_hw_sequencer.c | 22 ++++++++++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 4fd49a16c3b6..c04d67db9cea 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,6 +87,11 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; + REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); + /*we need turn on clock before programming AFMT block*/ + if (regval != 1) + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) ASSERT(0); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index c3de7f305cd3..07ff8d2faf3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -991,6 +991,16 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) struct dc_link *link = stream->sink->link; struct dc *dc = pipe_ctx->stream->ctx->dc; + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( + pipe_ctx->stream_res.stream_enc); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( + pipe_ctx->stream_res.stream_enc); + + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); @@ -1015,18 +1025,6 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) */ } - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( - pipe_ctx->stream_res.stream_enc); - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( - pipe_ctx->stream_res.stream_enc); - - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, true); - - /* blank at encoder level */ if (dc_is_dp_signal(pipe_ctx->stream->signal)) { if (pipe_ctx->stream->sink->link->connector_signal == SIGNAL_TYPE_EDP) -- cgit v1.2.3 From cfb071f7a9673109415d097125b3c12c16836acc Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:55:57 -0500 Subject: drm/amd/display: try to find matching audio inst for enc inst first [Description] in eDP+ HDMI/DP clone or extended configuration, audio inst changed from inst 1 to inst0. No failure related this though, just playback device endpoint inst changed. Also remove one addition register read. Signed-off-by: Charlene Liu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c | 4 +--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 655e08df48a0..546fe99d34f2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1449,11 +1449,16 @@ static struct stream_encoder *find_first_free_match_stream_enc_for_link( static struct audio *find_first_free_audio( struct resource_context *res_ctx, - const struct resource_pool *pool) + const struct resource_pool *pool, + enum engine_id id) { int i; for (i = 0; i < pool->audio_count; i++) { if ((res_ctx->is_audio_acquired[i] == false) && (res_ctx->is_stream_enc_acquired[i] == true)) { + /*we have enough audio endpoint, find the matching inst*/ + if (id != i) + continue; + return pool->audios[i]; } } @@ -1702,7 +1707,7 @@ enum dc_status resource_map_pool_resources( dc_is_audio_capable_signal(pipe_ctx->stream->signal) && stream->audio_info.mode_count) { pipe_ctx->stream_res.audio = find_first_free_audio( - &context->res_ctx, pool); + &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id); /* * Audio assigned in order first come first get. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index c04d67db9cea..e42b6eb1c1f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,10 +87,8 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; - REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); /*we need turn on clock before programming AFMT block*/ - if (regval != 1) - REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) -- cgit v1.2.3 From 70e8ffc55b98f31af700eae525fef5d0b8fcb6e1 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:19:02 -0500 Subject: drm/amd/display: Fix amdgpu_dm bugs found by smatch drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2760 create_eml_sink() warn: variable dereferenced before check 'aconnector->base.edid_blob_ptr' (see line 2758) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4270 amdgpu_dm_atomic_commit_tail() warn: variable dereferenced before check 'dm_new_crtc_state->stream' (see line 4266) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4417 dm_restore_drm_connector_state() warn: variable dereferenced before check 'disconnected_acrtc' (see line 4415) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8b0fb25d84a6..5025dafb0517 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2709,7 +2709,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; - struct edid *edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + struct edid *edid; if (!aconnector->base.edid_blob_ptr || !aconnector->base.edid_blob_ptr->data) { @@ -2721,6 +2721,8 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) return; } + edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + aconnector->edid = edid; aconnector->dc_em_sink = dc_link_add_remote_sink( @@ -4198,13 +4200,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); + if (!dm_new_crtc_state->stream) + continue; + status = dc_stream_get_status(dm_new_crtc_state->stream); WARN_ON(!status); WARN_ON(!status->plane_count); - if (!dm_new_crtc_state->stream) - continue; - /*TODO How it works with MPO ?*/ if (!dc_commit_planes_to_stream( dm->dc, @@ -4337,9 +4339,11 @@ void dm_restore_drm_connector_state(struct drm_device *dev, return; disconnected_acrtc = to_amdgpu_crtc(connector->encoder->crtc); - acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!disconnected_acrtc) + return; - if (!disconnected_acrtc || !acrtc_state->stream) + acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!acrtc_state->stream) return; /* -- cgit v1.2.3 From a8f9764731968d6a63f6f98c5b0d4e7a0e4154e2 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:56:15 -0500 Subject: drm/amd/display: Bunch of smatch error and warning fixes in DC drivers/gpu/drm/amd/amdgpu/../display/dc/basics/log_helpers.c:79 dc_conn_log() error: buffer overflow 'signal_type_info_tbl' 10 <= 10 drivers/gpu/drm/amd/amdgpu/../display/dc/bios/bios_parser.c:266 bios_parser_get_dst_obj() error: uninitialized symbol 'id'. drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_audio.c:357 dce_aud_az_enable() warn: inconsistent indenting drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:958 dcn10_acquire_idle_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 952) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/log_helpers.c | 5 +++++ drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 10 +++++----- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c index 785b943b60ed..6e43168fbdd6 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c @@ -75,6 +75,9 @@ void dc_conn_log(struct dc_context *ctx, if (signal == signal_type_info_tbl[i].type) break; + if (i == NUM_ELEMENTS(signal_type_info_tbl)) + goto fail; + dm_logger_append(&entry, "[%s][ConnIdx:%d] ", signal_type_info_tbl[i].name, link->link_index); @@ -96,6 +99,8 @@ void dc_conn_log(struct dc_context *ctx, dm_logger_append(&entry, "^\n"); dm_helpers_dc_conn_log(ctx, &entry, event); + +fail: dm_logger_close(&entry); va_end(args); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index aaaebd06d7ee..86e6438c5cf3 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -249,7 +249,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, struct graphics_object_id *dest_object_id) { uint32_t number; - uint16_t *id; + uint16_t *id = NULL; ATOM_OBJECT *object; struct bios_parser *bp = BP_FROM_DCB(dcb); @@ -260,7 +260,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, number = get_dest_obj_list(bp, object, &id); - if (number <= index) + if (number <= index || !id) return BP_RESULT_BADINPUT; *dest_object_id = object_id_from_bios_object_id(id[index]); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 81c40f8864db..0df9ecb2710c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -352,11 +352,11 @@ void dce_aud_az_enable(struct audio *audio) uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - CLOCK_GATING_DISABLE); - set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - AUDIO_ENABLED); + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + set_reg_field_value(value, 1, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + AUDIO_ENABLED); AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4c4bd72d4e40..9fc8f827f2a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -912,11 +912,13 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) - return false; + return NULL; idle_pipe->stream = head_pipe->stream; idle_pipe->stream_res.tg = head_pipe->stream_res.tg; -- cgit v1.2.3 From b001965d4eb5e4c0e3de7dc6dce1d45ca25232a7 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 12:11:44 -0500 Subject: drm/amd/display: Fix use before NULL check in validate_timing Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dce110/dce110_timing_generator.c:1124 dce110_timing_generator_validate_timing() warn: variable dereferenced before check 'timing' (see line 1116) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 67ac737eaa7e..4befce6cd87a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1112,10 +1112,7 @@ bool dce110_timing_generator_validate_timing( enum signal_type signal) { uint32_t h_blank; - uint32_t h_back_porch; - uint32_t hsync_offset = timing->h_border_right + - timing->h_front_porch; - uint32_t h_sync_start = timing->h_addressable + hsync_offset; + uint32_t h_back_porch, hsync_offset, h_sync_start; struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); @@ -1124,6 +1121,9 @@ bool dce110_timing_generator_validate_timing( if (!timing) return false; + hsync_offset = timing->h_border_right + timing->h_front_porch; + h_sync_start = timing->h_addressable + hsync_offset; + /* Currently we don't support 3D, so block all 3D timings */ if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) return false; -- cgit v1.2.3 From b3fb2b4e21a995c4fa511627088bd55b88f6be11 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 19:49:44 -0500 Subject: drm/amd/display: Fix hubp check in set_cursor_position Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c:298 dc_stream_set_cursor_position() error: we previously assumed 'hubp' could be null (see line 294) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 9 ++++----- drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 7 ------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index b00a6040a697..e230cc44a0a7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -263,7 +263,6 @@ bool dc_stream_set_cursor_position( struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; struct mem_input *mi = pipe_ctx->plane_res.mi; struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct transform *xfm = pipe_ctx->plane_res.xfm; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_cursor_position pos_cpy = *position; struct dc_cursor_mi_param param = { @@ -294,11 +293,11 @@ bool dc_stream_set_cursor_position( if (mi != NULL && mi->funcs->set_cursor_position != NULL) mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); - if (hubp != NULL && hubp->funcs->set_cursor_position != NULL) - hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); + if (!hubp) + continue; - if (xfm != NULL && xfm->funcs->set_cursor_position != NULL) - xfm->funcs->set_cursor_position(xfm, &pos_cpy, ¶m, hubp->curs_attr.width); + if (hubp->funcs->set_cursor_position != NULL) + hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); if (dpp != NULL && dpp->funcs->set_cursor_position != NULL) dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 7c08bc62c1f5..ea88997e1bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -259,13 +259,6 @@ struct transform_funcs { struct transform *xfm_base, const struct dc_cursor_attributes *attr); - void (*set_cursor_position)( - struct transform *xfm_base, - const struct dc_cursor_position *pos, - const struct dc_cursor_mi_param *param, - uint32_t width - ); - }; const uint16_t *get_filter_2tap_16p(void); -- cgit v1.2.3 From edf38b58ecd847df75723fb90d4a1c3669b1e670 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:11:37 -0500 Subject: drm/amd/display: Fix potential NULL and mem leak in create_links Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:148 create_links() error: potential null dereference 'link->link_enc'. (kzalloc returns null) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 15625fd94455..7240db2e6f09 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -121,6 +121,10 @@ static bool create_links( goto failed_alloc; } + link->link_index = dc->link_count; + dc->links[dc->link_count] = link; + dc->link_count++; + link->ctx = dc->ctx; link->dc = dc; link->connector_signal = SIGNAL_TYPE_VIRTUAL; @@ -128,6 +132,12 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + + if (!link->link_enc) { + BREAK_TO_DEBUGGER(); + goto failed_alloc; + } + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; @@ -139,10 +149,6 @@ static bool create_links( enc_init.encoder.id = ENCODER_ID_INTERNAL_VIRTUAL; enc_init.encoder.enum_id = ENUM_ID_1; virtual_link_encoder_construct(link->link_enc, &enc_init); - - link->link_index = dc->link_count; - dc->links[dc->link_count] = link; - dc->link_count++; } return true; -- cgit v1.2.3 From e41ab0309a1b75d31c360d1d2b0de58e8d7958ad Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:35:27 -0500 Subject: drm/amd/display: Fix couple more inconsistent NULL checks in dc_resource Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1001 acquire_free_pipe_for_stream() error: we previously assumed 'head_pipe' could be null (see line 998) drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1808 dc_validate_global_state() error: we previously assumed 'new_ctx' could be null (see line 1778) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 546fe99d34f2..b7422d3b71ef 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -993,8 +993,10 @@ static struct pipe_ctx *acquire_free_pipe_for_stream( head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!head_pipe->plane_state) return head_pipe; @@ -1772,13 +1774,16 @@ enum dc_status dc_validate_global_state( enum dc_status result = DC_ERROR_UNEXPECTED; int i, j; + if (!new_ctx) + return DC_ERROR_UNEXPECTED; + if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); if (result != DC_OK) return result; } - for (i = 0; new_ctx && i < new_ctx->stream_count; i++) { + for (i = 0; i < new_ctx->stream_count; i++) { struct dc_stream_state *stream = new_ctx->streams[i]; for (j = 0; j < dc->res_pool->pipe_count; j++) { -- cgit v1.2.3 From 0a24bfcb2cc2cd161a0c8aae8fcbb58239d5da2b Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Thu, 16 Nov 2017 15:17:27 -0500 Subject: drm/amd/display: Do not put drm_atomic_state on resume drm_atomic_helper_resume now puts it for us. See relevant patch here: https://lists.freedesktop.org/archives/dri-devel/2017-October/154268.html Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5025dafb0517..3d6fbc38d95f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -716,7 +716,6 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state); - drm_atomic_state_put(adev->dm.cached_state); adev->dm.cached_state = NULL; amdgpu_dm_irq_resume_late(adev); -- cgit v1.2.3 From d5c9cb6e00047b194d54cadc1e91156f2875a3e5 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 16 Nov 2017 17:22:39 -0500 Subject: drm/amd/display: fix gamma setting Adding gamma changed check as condition for affected plane. We ignored adding plane as affected if modeset was not required. But for color management change we still need it. Signed-off-by: Roman Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3d6fbc38d95f..8d94dfc7ba3f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4717,7 +4717,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } else { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) + if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && + !new_crtc_state->color_mgmt_changed) continue; if (!new_crtc_state->enable) -- cgit v1.2.3 From 320a127437e5d3cbb7fc444f8769eb510d11d3b9 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 14 Nov 2017 20:45:52 -0500 Subject: drm/amd/display: Switch to drm_atomic_helper_wait_for_flip_done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new helper function is advised to be used for drviers that use the nonblocking commit tracking support instead of drm_atomic_helper_wait_for_vblanks. Signed-off-by: Andrey Grodzovsky Reviewed-by: Harry Wentland Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8d94dfc7ba3f..f71fe6d2ddda 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4259,7 +4259,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_hw_done(state); if (wait_for_vblank) - drm_atomic_helper_wait_for_vblanks(dev, state); + drm_atomic_helper_wait_for_flip_done(dev, state); drm_atomic_helper_cleanup_planes(dev, state); } -- cgit v1.2.3 From 97011249c3b2ba6b038a2af7025063d62d1448ec Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 20 Nov 2017 12:45:54 -0500 Subject: drm/amd/display: USB-C / thunderbolt dock specific workaround reading dpcd 0x600 cause link loss for a particular USB-C dock with thurderbolt. workaround by avoiding dcpd 0x600 read unless it's necessary. Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 85 +++++++++++------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 8e97b42a03a2..e6bf05d76a94 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1512,7 +1512,7 @@ static bool hpd_rx_irq_check_link_loss_status( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data) { - uint8_t irq_reg_rx_power_state; + uint8_t irq_reg_rx_power_state = 0; enum dc_status dpcd_result = DC_ERROR_UNEXPECTED; union lane_status lane_status; uint32_t lane; @@ -1524,60 +1524,55 @@ static bool hpd_rx_irq_check_link_loss_status( if (link->cur_link_settings.lane_count == 0) return return_code; - /*1. Check that we can handle interrupt: Not in FS DOS, - * Not in "Display Timeout" state, Link is trained. - */ - dpcd_result = core_link_read_dpcd(link, - DP_SET_POWER, - &irq_reg_rx_power_state, - sizeof(irq_reg_rx_power_state)); + /*1. Check that Link Status changed, before re-training.*/ - if (dpcd_result != DC_OK) { - irq_reg_rx_power_state = DP_SET_POWER_D0; - dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: DPCD read failed to obtain power state.\n", - __func__); + /*parse lane status*/ + for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { + /* check status of lanes 0,1 + * changed DpcdAddress_Lane01Status (0x202) + */ + lane_status.raw = get_nibble_at_index( + &hpd_irq_dpcd_data->bytes.lane01_status.raw, + lane); + + if (!lane_status.bits.CHANNEL_EQ_DONE_0 || + !lane_status.bits.CR_DONE_0 || + !lane_status.bits.SYMBOL_LOCKED_0) { + /* if one of the channel equalization, clock + * recovery or symbol lock is dropped + * consider it as (link has been + * dropped) dp sink status has changed + */ + sink_status_changed = true; + break; + } } - if (irq_reg_rx_power_state == DP_SET_POWER_D0) { + /* Check interlane align.*/ + if (sink_status_changed || + !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) { - /*2. Check that Link Status changed, before re-training.*/ - - /*parse lane status*/ - for (lane = 0; - lane < link->cur_link_settings.lane_count; - lane++) { - - /* check status of lanes 0,1 - * changed DpcdAddress_Lane01Status (0x202)*/ - lane_status.raw = get_nibble_at_index( - &hpd_irq_dpcd_data->bytes.lane01_status.raw, - lane); - - if (!lane_status.bits.CHANNEL_EQ_DONE_0 || - !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { - /* if one of the channel equalization, clock - * recovery or symbol lock is dropped - * consider it as (link has been - * dropped) dp sink status has changed*/ - sink_status_changed = true; - break; - } + dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, + "%s: Link Status changed.\n", __func__); - } + return_code = true; - /* Check interlane align.*/ - if (sink_status_changed || - !hpd_irq_dpcd_data->bytes.lane_status_updated.bits. - INTERLANE_ALIGN_DONE) { + /*2. Check that we can handle interrupt: Not in FS DOS, + * Not in "Display Timeout" state, Link is trained. + */ + dpcd_result = core_link_read_dpcd(link, + DP_SET_POWER, + &irq_reg_rx_power_state, + sizeof(irq_reg_rx_power_state)); + if (dpcd_result != DC_OK) { dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: Link Status changed.\n", + "%s: DPCD read failed to obtain power state.\n", __func__); - - return_code = true; + } else { + if (irq_reg_rx_power_state != DP_SET_POWER_D0) + return_code = false; } } -- cgit v1.2.3 From 553d8e8b107159088cc4e2855a2bd9a358365e3f Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:55:24 +1100 Subject: docs: correct documentation for %pK Current documentation indicates that %pK prints a leading '0x'. This is not the case. Correct documentation for printk specifier %pK. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 361789df51ec..71b62db7eca2 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -85,13 +85,12 @@ Examples:: printk("Faulted at %pS\n", (void *)regs->ip); printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack); - Kernel Pointers =============== :: - %pK 0x01234567 or 0x0123456789abcdef + %pK 01234567 or 0123456789abcdef For printing kernel pointers which should be hidden from unprivileged users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see -- cgit v1.2.3 From 57e734423adda83f3b05505875343284efe3b39c Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:56:39 +1100 Subject: vsprintf: refactor %pK code out of pointer() Currently code to handle %pK is all within the switch statement in pointer(). This is the wrong level of abstraction. Each of the other switch clauses call a helper function, pK should do the same. Refactor code out of pointer() to new function restricted_pointer(). Signed-off-by: Tobin C. Harding --- lib/vsprintf.c | 97 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 1746bae94d41..8dc5cf85cef4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1343,6 +1343,59 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } +int kptr_restrict __read_mostly; + +static noinline_for_stack +char *restricted_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + spec.base = 16; + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2 * sizeof(ptr); + spec.flags |= ZEROPAD; + } + + switch (kptr_restrict) { + case 0: + /* Always print %pK values */ + break; + case 1: { + const struct cred *cred; + + /* + * kptr_restrict==1 cannot be used in IRQ context + * because its test for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) + return string(buf, end, "pK-error", spec); + + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + cred = current_cred(); + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ + ptr = NULL; + break; + } + + return number(buf, end, (unsigned long)ptr, spec); +} + static noinline_for_stack char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt) { @@ -1591,8 +1644,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } -int kptr_restrict __read_mostly; - /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1792,47 +1843,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': - switch (kptr_restrict) { - case 0: - /* Always print %pK values */ - break; - case 1: { - const struct cred *cred; - - /* - * kptr_restrict==1 cannot be used in IRQ context - * because its test for CAP_SYSLOG would be meaningless. - */ - if (in_irq() || in_serving_softirq() || in_nmi()) { - if (spec.field_width == -1) - spec.field_width = default_width; - return string(buf, end, "pK-error", spec); - } - - /* - * Only print the real pointer value if the current - * process has CAP_SYSLOG and is running with the - * same credentials it started with. This is because - * access to files is checked at open() time, but %pK - * checks permission at read() time. We don't want to - * leak pointer values if a binary opens a file using - * %pK and then elevates privileges before reading it. - */ - cred = current_cred(); - if (!has_capability_noaudit(current, CAP_SYSLOG) || - !uid_eq(cred->euid, cred->uid) || - !gid_eq(cred->egid, cred->gid)) - ptr = NULL; - break; - } - case 2: - default: - /* Always print 0's for %pK */ - ptr = NULL; - break; - } - break; - + return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, fmt); case 'a': -- cgit v1.2.3 From ad67b74d2469d9b82aaa572d76474c95bc484d57 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 1 Nov 2017 15:32:23 +1100 Subject: printk: hash addresses printed with %p Currently there exist approximately 14 000 places in the kernel where addresses are being printed using an unadorned %p. This potentially leaks sensitive information regarding the Kernel layout in memory. Many of these calls are stale, instead of fixing every call lets hash the address by default before printing. This will of course break some users, forcing code printing needed addresses to be updated. Code that _really_ needs the address will soon be able to use the new printk specifier %px to print the address. For what it's worth, usage of unadorned %p can be broken down as follows (thanks to Joe Perches). $ git grep -E '%p[^A-Za-z0-9]' | cut -f1 -d"/" | sort | uniq -c 1084 arch 20 block 10 crypto 32 Documentation 8121 drivers 1221 fs 143 include 101 kernel 69 lib 100 mm 1510 net 40 samples 7 scripts 11 security 166 sound 152 tools 2 virt Add function ptr_to_id() to map an address to a 32 bit unique identifier. Hash any unadorned usage of specifier %p and any malformed specifiers. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 12 ++++- lib/test_printf.c | 108 +++++++++++++++++++++++++-------------- lib/vsprintf.c | 81 ++++++++++++++++++++++++++--- 3 files changed, 155 insertions(+), 46 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 71b62db7eca2..b4e668ac4fe3 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -5,7 +5,6 @@ How to get printk format specifiers right :Author: Randy Dunlap :Author: Andrew Murray - Integer types ============= @@ -45,6 +44,17 @@ return from vsnprintf. Raw pointer value SHOULD be printed with %p. The kernel supports the following extended format specifiers for pointer types: +Pointer Types +============= + +Pointers printed without a specifier extension (i.e unadorned %p) are +hashed to give a unique identifier without leaking kernel addresses to user +space. On 64 bit machines the first 32 bits are zeroed. + +:: + + %p abcdef12 or 00000000abcdef12 + Symbols/Function Pointers ========================= diff --git a/lib/test_printf.c b/lib/test_printf.c index 563f10e6876a..71ebfa43ad05 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -24,24 +24,6 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -#define PTR1 ((void*)0x01234567) -#define PTR2 ((void*)(long)(int)0xfedcba98) - -#if BITS_PER_LONG == 64 -#define PTR1_ZEROES "000000000" -#define PTR1_SPACES " " -#define PTR1_STR "1234567" -#define PTR2_STR "fffffffffedcba98" -#define PTR_WIDTH 16 -#else -#define PTR1_ZEROES "0" -#define PTR1_SPACES " " -#define PTR1_STR "1234567" -#define PTR2_STR "fedcba98" -#define PTR_WIDTH 8 -#endif -#define PTR_WIDTH_STR stringify(PTR_WIDTH) - static unsigned total_tests __initdata; static unsigned failed_tests __initdata; static char *test_buffer __initdata; @@ -217,30 +199,79 @@ test_string(void) test("a | | ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c"); } +#define PLAIN_BUF_SIZE 64 /* leave some space so we don't oops */ + +#if BITS_PER_LONG == 64 + +#define PTR_WIDTH 16 +#define PTR ((void *)0xffff0123456789ab) +#define PTR_STR "ffff0123456789ab" +#define ZEROS "00000000" /* hex 32 zero bits */ + +static int __init +plain_format(void) +{ + char buf[PLAIN_BUF_SIZE]; + int nchars; + + nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); + + if (nchars != PTR_WIDTH || strncmp(buf, ZEROS, strlen(ZEROS)) != 0) + return -1; + + return 0; +} + +#else + +#define PTR_WIDTH 8 +#define PTR ((void *)0x456789ab) +#define PTR_STR "456789ab" + +static int __init +plain_format(void) +{ + /* Format is implicitly tested for 32 bit machines by plain_hash() */ + return 0; +} + +#endif /* BITS_PER_LONG == 64 */ + +static int __init +plain_hash(void) +{ + char buf[PLAIN_BUF_SIZE]; + int nchars; + + nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); + + if (nchars != PTR_WIDTH || strncmp(buf, PTR_STR, PTR_WIDTH) == 0) + return -1; + + return 0; +} + +/* + * We can't use test() to test %p because we don't know what output to expect + * after an address is hashed. + */ static void __init plain(void) { - test(PTR1_ZEROES PTR1_STR " " PTR2_STR, "%p %p", PTR1, PTR2); - /* - * The field width is overloaded for some %p extensions to - * pass another piece of information. For plain pointers, the - * behaviour is slightly odd: One cannot pass either the 0 - * flag nor a precision to %p without gcc complaining, and if - * one explicitly gives a field width, the number is no longer - * zero-padded. - */ - test("|" PTR1_STR PTR1_SPACES " | " PTR1_SPACES PTR1_STR "|", - "|%-*p|%*p|", PTR_WIDTH+2, PTR1, PTR_WIDTH+2, PTR1); - test("|" PTR2_STR " | " PTR2_STR "|", - "|%-*p|%*p|", PTR_WIDTH+2, PTR2, PTR_WIDTH+2, PTR2); + int err; - /* - * Unrecognized %p extensions are treated as plain %p, but the - * alphanumeric suffix is ignored (that is, does not occur in - * the output.) - */ - test("|"PTR1_ZEROES PTR1_STR"|", "|%p0y|", PTR1); - test("|"PTR2_STR"|", "|%p0y|", PTR2); + err = plain_hash(); + if (err) { + pr_warn("plain 'p' does not appear to be hashed\n"); + failed_tests++; + return; + } + + err = plain_format(); + if (err) { + pr_warn("hashing plain 'p' has unexpected format\n"); + failed_tests++; + } } static void __init @@ -251,6 +282,7 @@ symbol_ptr(void) static void __init kernel_ptr(void) { + /* We can't test this without access to kptr_restrict. */ } static void __init diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8dc5cf85cef4..d69452a0f2fa 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #ifdef CONFIG_BLOCK #include #endif @@ -1644,6 +1646,73 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static bool have_filled_random_ptr_key __read_mostly; +static siphash_key_t ptr_key __read_mostly; + +static void fill_random_ptr_key(struct random_ready_callback *unused) +{ + get_random_bytes(&ptr_key, sizeof(ptr_key)); + /* + * have_filled_random_ptr_key==true is dependent on get_random_bytes(). + * ptr_to_id() needs to see have_filled_random_ptr_key==true + * after get_random_bytes() returns. + */ + smp_mb(); + WRITE_ONCE(have_filled_random_ptr_key, true); +} + +static struct random_ready_callback random_ready = { + .func = fill_random_ptr_key +}; + +static int __init initialize_ptr_random(void) +{ + int ret = add_random_ready_callback(&random_ready); + + if (!ret) { + return 0; + } else if (ret == -EALREADY) { + fill_random_ptr_key(&random_ready); + return 0; + } + + return ret; +} +early_initcall(initialize_ptr_random); + +/* Maps a pointer to a 32 bit unique identifier. */ +static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) +{ + unsigned long hashval; + const int default_width = 2 * sizeof(ptr); + + if (unlikely(!have_filled_random_ptr_key)) { + spec.field_width = default_width; + /* string length must be less than default_width */ + return string(buf, end, "(ptrval)", spec); + } + +#ifdef CONFIG_64BIT + hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); + /* + * Mask off the first 32 bits, this makes explicit that we have + * modified the address (and 32 bits is plenty for a unique ID). + */ + hashval = hashval & 0xffffffff; +#else + hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key); +#endif + + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = default_width; + spec.flags |= ZEROPAD; + } + spec.base = 16; + + return number(buf, end, hashval, spec); +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1754,6 +1823,9 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a * pointer to the real address. + * + * Note: The default behaviour (unadorned %p) is to hash the address, + * rendering it useful as a unique identifier. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -1869,14 +1941,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); } } - spec.flags |= SMALL; - if (spec.field_width == -1) { - spec.field_width = default_width; - spec.flags |= ZEROPAD; - } - spec.base = 16; - return number(buf, end, (unsigned long) ptr, spec); + /* default is to _not_ leak addresses, hash before printing */ + return ptr_to_id(buf, end, ptr, spec); } /* -- cgit v1.2.3 From 7b1924a1d930eb27fc79c4e4e2a6c1c970623e68 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 23 Nov 2017 10:59:45 +1100 Subject: vsprintf: add printk specifier %px printk specifier %p now hashes all addresses before printing. Sometimes we need to see the actual unmodified address. This can be achieved using %lx but then we face the risk that if in future we want to change the way the Kernel handles printing of pointers we will have to grep through the already existent 50 000 %lx call sites. Let's add specifier %px as a clear, opt-in, way to print a pointer and maintain some level of isolation from all the other hex integer output within the Kernel. Add printk specifier %px to print the actual unmodified address. Signed-off-by: Tobin C. Harding --- Documentation/printk-formats.txt | 18 +++++++++++++++++- lib/vsprintf.c | 18 ++++++++++++++++++ scripts/checkpatch.pl | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index b4e668ac4fe3..aa0a776c817a 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -49,7 +49,8 @@ Pointer Types Pointers printed without a specifier extension (i.e unadorned %p) are hashed to give a unique identifier without leaking kernel addresses to user -space. On 64 bit machines the first 32 bits are zeroed. +space. On 64 bit machines the first 32 bits are zeroed. If you _really_ +want the address see %px below. :: @@ -106,6 +107,21 @@ For printing kernel pointers which should be hidden from unprivileged users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see Documentation/sysctl/kernel.txt for more details. +Unmodified Addresses +==================== + +:: + + %px 01234567 or 0123456789abcdef + +For printing pointers when you _really_ want to print the address. Please +consider whether or not you are leaking sensitive information about the +Kernel layout in memory before printing pointers with %px. %px is +functionally equivalent to %lx. %px is preferred to %lx because it is more +uniquely grep'able. If, in the future, we need to modify the way the Kernel +handles printing pointers it will be nice to be able to find the call +sites. + Struct Resources ================ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d69452a0f2fa..d960aead0336 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1646,6 +1646,20 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static noinline_for_stack +char *pointer_string(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + spec.base = 16; + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2 * sizeof(ptr); + spec.flags |= ZEROPAD; + } + + return number(buf, end, (unsigned long int)ptr, spec); +} + static bool have_filled_random_ptr_key __read_mostly; static siphash_key_t ptr_key __read_mostly; @@ -1818,6 +1832,8 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) * c major compatible string * C full compatible string * + * - 'x' For printing the address. Equivalent to "%lx". + * * ** Please update also Documentation/printk-formats.txt when making changes ** * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 @@ -1940,6 +1956,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': return device_node_string(buf, end, ptr, spec, fmt + 1); } + case 'x': + return pointer_string(buf, end, ptr, spec); } /* default is to _not_ leak addresses, hash before printing */ diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 95cda3ecc66b..040aa79e1d9d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5753,7 +5753,7 @@ sub process { for (my $count = $linenr; $count <= $lc; $count++) { my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); $fmt =~ s/%%//g; - if ($fmt =~ /(\%[\*\d\.]*p(?![\WFfSsBKRraEhMmIiUDdgVCbGNO]).)/) { + if ($fmt =~ /(\%[\*\d\.]*p(?![\WFfSsBKRraEhMmIiUDdgVCbGNOx]).)/) { $bad_extension = $1; last; } -- cgit v1.2.3 From 6424f6bb432752c7eb90cbeeb1c31d6125bba39a Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 1 Nov 2017 15:32:22 +1100 Subject: kasan: use %px to print addresses instead of %p Pointers printed with %p are now hashed by default. Kasan needs the actual address. We can use the new printk specifier %px for this purpose. Use %px instead of %p to print addresses. Signed-off-by: Tobin C. Harding --- mm/kasan/report.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 6bcfb01ba038..410c8235e671 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -134,7 +134,7 @@ static void print_error_description(struct kasan_access_info *info) pr_err("BUG: KASAN: %s in %pS\n", bug_type, (void *)info->ip); - pr_err("%s of size %zu at addr %p by task %s/%d\n", + pr_err("%s of size %zu at addr %px by task %s/%d\n", info->is_write ? "Write" : "Read", info->access_size, info->access_addr, current->comm, task_pid_nr(current)); } @@ -206,7 +206,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, const char *rel_type; int rel_bytes; - pr_err("The buggy address belongs to the object at %p\n" + pr_err("The buggy address belongs to the object at %px\n" " which belongs to the cache %s of size %d\n", object, cache->name, cache->object_size); @@ -225,7 +225,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, } pr_err("The buggy address is located %d bytes %s of\n" - " %d-byte region [%p, %p)\n", + " %d-byte region [%px, %px)\n", rel_bytes, rel_type, cache->object_size, (void *)object_addr, (void *)(object_addr + cache->object_size)); } @@ -302,7 +302,7 @@ static void print_shadow_for_address(const void *addr) char shadow_buf[SHADOW_BYTES_PER_ROW]; snprintf(buffer, sizeof(buffer), - (i == 0) ? ">%p: " : " %p: ", kaddr); + (i == 0) ? ">%px: " : " %px: ", kaddr); /* * We should not pass a shadow pointer to generic * function, because generic functions may try to -- cgit v1.2.3 From aca7573fde95152378361cba734996b384f3b1d3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 28 Nov 2017 08:23:04 +0530 Subject: powerpc: Avoid signed to unsigned conversion in set_thread_tidr() There is an unsafe signed to unsigned conversion in set_thread_tidr() that may cause an error value to be assigned to SPRN_TIDR register and used as thread-id. The issue happens as assign_thread_tidr() returns an int and thread.tidr is an unsigned-long. So a negative error code returned from assign_thread_tidr() will fail the error check and gets assigned as tidr as a large positive value. To fix this the patch assigns the return value of assign_thread_tidr() to a temporary int and assigns it to thread.tidr iff its '> 0'. The patch shouldn't impact the calling convention of set_thread_tidr() i.e all -ve return-values are error codes and a return value of '0' indicates success. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Christophe Lombard clombard@linux.vnet.ibm.com Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfdd783e3916..d205b52e3850 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1569,16 +1569,19 @@ void arch_release_task_struct(struct task_struct *t) */ int set_thread_tidr(struct task_struct *t) { + int rc; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -EINVAL; if (t != current) return -EINVAL; - t->thread.tidr = assign_thread_tidr(); - if (t->thread.tidr < 0) - return t->thread.tidr; + rc = assign_thread_tidr(); + if (rc < 0) + return rc; + t->thread.tidr = rc; mtspr(SPRN_TIDR, t->thread.tidr); return 0; -- cgit v1.2.3 From 7e4d4233260be0611c7fbdb2730c12731c4b8dc0 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 24 Nov 2017 14:03:38 +0530 Subject: powerpc: Do not assign thread.tidr if already assigned If set_thread_tidr() is called twice for same task_struct then it will allocate a new tidr value to it leaving the previous value still dangling in the vas_thread_ida table. To fix this the patch changes set_thread_tidr() to check if a tidr value is already assigned to the task_struct and if yes then returns zero. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Andrew Donnellan [mpe: Modify to return 0 in the success case, not the TID value] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d205b52e3850..5acb5a176dbe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1577,6 +1577,9 @@ int set_thread_tidr(struct task_struct *t) if (t != current) return -EINVAL; + if (t->thread.tidr) + return 0; + rc = assign_thread_tidr(); if (rc < 0) return rc; -- cgit v1.2.3 From 4bd6683da2e64590bdc27ecf7e61ad8376861768 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 19:55:18 +0200 Subject: pinctrl: denverton: Fix UART2 RTS pin mode UART2 RTS is mode 2 of the pin. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-denverton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-denverton.c b/drivers/pinctrl/intel/pinctrl-denverton.c index 4500880240f2..6572550cfe78 100644 --- a/drivers/pinctrl/intel/pinctrl-denverton.c +++ b/drivers/pinctrl/intel/pinctrl-denverton.c @@ -207,7 +207,7 @@ static const unsigned int dnv_uart0_pins[] = { 60, 61, 64, 65 }; static const unsigned int dnv_uart0_modes[] = { 2, 3, 1, 1 }; static const unsigned int dnv_uart1_pins[] = { 94, 95, 96, 97 }; static const unsigned int dnv_uart2_pins[] = { 60, 61, 62, 63 }; -static const unsigned int dnv_uart2_modes[] = { 1, 1, 2, 2 }; +static const unsigned int dnv_uart2_modes[] = { 1, 2, 2, 2 }; static const unsigned int dnv_emmc_pins[] = { 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, }; -- cgit v1.2.3 From 80a780a167d9267c72867b806142bd6ec69ba123 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:06 +0100 Subject: mmc: core: prepend 0x to pre_eol_info entry in sysfs The sysfs entry "pre_eol_info" was missing the 0x prefix to identify it as hex formatted. Fixes: 46bc5c408e4e ("mmc: core: Export device lifetime information through sysfs") Signed-off-by: Bastian Stender Cc: # v4.11+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a552f61060d2..b8259fcb4bda 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -781,7 +781,7 @@ MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev); -MMC_DEV_ATTR(pre_eol_info, "%02x\n", card->ext_csd.pre_eol_info); +MMC_DEV_ATTR(pre_eol_info, "0x%02x\n", card->ext_csd.pre_eol_info); MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n", card->ext_csd.device_life_time_est_typ_a, card->ext_csd.device_life_time_est_typ_b); -- cgit v1.2.3 From c892b0d81705c566f575e489efc3c50762db1bde Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:07 +0100 Subject: mmc: core: prepend 0x to OCR entry in sysfs The sysfs entry "ocr" was missing the 0x prefix to identify it as hex formatted. Fixes: 5fb06af7a33b ("mmc: core: Extend sysfs with OCR register") Signed-off-by: Bastian Stender Cc: # v4.8+ [Ulf: Amended change to also cover SD-cards] Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- drivers/mmc/core/sd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b8259fcb4bda..d209fb466979 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -791,7 +791,7 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size); MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult); MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(cmdq_en, "%d\n", card->ext_csd.cmdq_en); static ssize_t mmc_fwrev_show(struct device *dev, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 45bf78f32716..62b84dd8f9fe 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -675,7 +675,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_dsr_show(struct device *dev, -- cgit v1.2.3 From 6702abb3bf2394f250af0ee04070227bb5dda788 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 14 Nov 2017 17:51:50 +0100 Subject: pinctrl: armada-37xx: Fix direction_output() callback behavior The direction_output callback of the gpio_chip structure is supposed to set the output direction but also to set the value of the gpio. For the armada-37xx driver this callback acted as the gpio_set_direction callback for the pinctrl. This patch fixes the behavior of the direction_output callback by also applying the value received as parameter. Cc: stable@vger.kernel.org Fixes: 5715092a458c ("pinctrl: armada-37xx: Add gpio support") Reported-by: Alexandre Belloni Signed-off-by: Gregory CLEMENT Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index d45af31b86b4..bdb8d174efef 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -408,12 +408,21 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, { struct armada_37xx_pinctrl *info = gpiochip_get_data(chip); unsigned int reg = OUTPUT_EN; - unsigned int mask; + unsigned int mask, val, ret; armada_37xx_update_reg(®, offset); mask = BIT(offset); - return regmap_update_bits(info->regmap, reg, mask, mask); + ret = regmap_update_bits(info->regmap, reg, mask, mask); + + if (ret) + return ret; + + reg = OUTPUT_VAL; + val = value ? mask : 0; + regmap_update_bits(info->regmap, reg, mask, val); + + return 0; } static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset) -- cgit v1.2.3 From a76dfe350c1fcb466b18595e65b33fa6abf65472 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 29 Nov 2017 12:04:31 +0100 Subject: drm/atomic: make drm_atomic_helper_wait_for_vblanks more agressive drm_atomic_helper_setup_commit expects that flipping of previous commits has happened when it is called to set up a new commit. This can be violated by commits where userspace doesn't get a flip completion event to synchronize against i.e. legacy modesets and property changes. The expectation is that those are done by blocking commits, which wait for completion. Most drivers call drm_atomic_helper_wait_for_vblanks in the commit_tail to ensure completion, but the wait for next vblank might not actually happen if the commit didn't change any planes. Make the wait more agressive by also waiting if no planes changed. This is the minimal regression fix for the 4.15 kernel series. Long term drivers should switch away from drm_atomic_helper_wait_for_vblanks and use drm_atomic_helper_wait_for_flip_done instead. Fixes: de39bec1a0c4 ("drm/atomic: Remove waits in drm_atomic_helper_commit_cleanup_done, v2.") Signed-off-by: Lucas Stach Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171129110431.6300-1-l.stach@pengutronix.de --- drivers/gpu/drm/drm_atomic_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 71d712f1b56a..b16f1d69a0bb 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1225,7 +1225,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, return; for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) { - if (!new_crtc_state->active || !new_crtc_state->planes_changed) + if (!new_crtc_state->active) continue; ret = drm_crtc_vblank_get(crtc); -- cgit v1.2.3 From 5478e478eee3b096b8d998d4ed445da30da2dfbc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 Nov 2017 22:06:13 +0100 Subject: eeprom: at24: correctly set the size for at24mac402 There's an ilog2() expansion in AT24_DEVICE_MAGIC() which rounds down the actual size of EUI-48 byte array in at24mac402 eeproms to 4 from 6, making it impossible to read it all. Fix it by manually adjusting the value in probe(). This patch contains a temporary fix that is suitable for stable branches. Eventually we'll probably remove the call to ilog2() while converting the magic values to actual structs. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/misc/eeprom/at24.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index e0b4b36ef010..783244b485cc 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -730,6 +730,16 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); + /* + * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while + * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. + * + * Eventually we'll get rid of the magic values altoghether in favor of + * real structs, but for now just manually set the right size. + */ + if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) + chip.byte_len = 6; + /* Use I2C operations unless we're stuck with SMBus extensions. */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { if (chip.flags & AT24_FLAG_ADDR16) -- cgit v1.2.3 From 01e4fab6c1131a5e4f12104e7134da701def0434 Mon Sep 17 00:00:00 2001 From: Hyong-Youb Kim Date: Wed, 29 Nov 2017 00:03:50 -0500 Subject: myri10ge: Update MAINTAINERS Change the maintainer to Chris Lee who has access to Myricom hardware and can test/review. Update the website URL. Signed-off-by: Hyong-Youb Kim Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..77d819b458a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9331,9 +9331,9 @@ F: drivers/gpu/drm/mxsfb/ F: Documentation/devicetree/bindings/display/mxsfb-drm.txt MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE) -M: Hyong-Youb Kim +M: Chris Lee L: netdev@vger.kernel.org -W: https://www.myricom.com/support/downloads/myri10ge.html +W: https://www.cspi.com/ethernet-products/support/downloads/ S: Supported F: drivers/net/ethernet/myricom/myri10ge/ -- cgit v1.2.3 From 52dd06506e9bbc2a37b352df7dfc5468f8da21fd Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Nov 2017 12:16:03 +0100 Subject: drm/fb_helper: Disable all crtc's when initial setup fails. Some drivers like i915 start with crtc's enabled, but with deferred fbcon setup they were no longer disabled as part of fbdev setup. Headless units could no longer enter pc3 state because the crtc was still enabled. Fix this by calling restore_fbdev_mode when we would have called it otherwise once during initial fbdev setup. Signed-off-by: Maarten Lankhorst Fixes: ca91a2758fce ("drm/fb-helper: Support deferred setup") Cc: # v4.14+ Reported-by: Thomas Voegtle Tested-by: Thomas Voegtle Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171128111603.62757-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_fb_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 07374008f146..e56166334455 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1809,6 +1809,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) { DRM_INFO("Cannot find any crtc or sizes\n"); + + /* First time: disable all crtc's.. */ + if (!fb_helper->deferred_setup && !READ_ONCE(fb_helper->dev->master)) + restore_fbdev_mode(fb_helper); return -EAGAIN; } -- cgit v1.2.3 From 3d7682af228fd78dc46bc6bf40e0268ad04521ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2017 14:25:50 +0000 Subject: rxrpc: Clean up whitespace Clean up some whitespace from rxrpc. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/conn_object.c | 2 +- net/rxrpc/input.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index bda952ffe6a6..555274ddc514 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -426,7 +426,7 @@ recheck_state: next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } - + set(call->expect_req_by); set(call->expect_term_by); set(call->ack_at); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 1aad04a32d5e..c628351eb900 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -424,7 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_set_service_reap_timer(rxnet, earliest); + rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 23a5e61d8f79..6fc61400337f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -976,7 +976,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, rxrpc_reduce_call_timer(call, expect_rx_by, now, rxrpc_timer_set_for_normal); } - + switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb, skew); @@ -1213,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk) goto reupgrade; conn->service_id = sp->hdr.serviceId; } - + if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); -- cgit v1.2.3 From 5fc62f6a139a7b06b027bf442cd4205619506f59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2017 14:40:41 +0000 Subject: rxrpc: Fix ACK generation from the connection event processor Repeat terminal ACKs and now terminal ACKs are now generated from the connection event processor rather from call handling as this allows us to discard client call structures as soon as possible and free up the channel for a follow on call. However, in ACKs so generated, the additional information trailer is malformed because the padding that's meant to be in the middle isn't included in what's transmitted. Fix it so that the 3 bytes of padding are included in the transmission. Further, the trailer is misaligned because of the padding, so assigment to the u16 and u32 fields inside it might cause problems on some arches, so fix this by breaking the padding and the trailer out of the packed struct. (This also deals with potential compiler weirdies where some of the nested structs are packed and some aren't). The symptoms can be seen in wireshark as terminal DUPLICATE or IDLE ACK packets in which the Max MTU, Interface MTU and rwind fields have weird values and the Max Packets field is apparently missing. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- net/rxrpc/conn_event.c | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9e9a8db1bc9c..4ca11be6be3c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -30,22 +30,18 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; - struct kvec iov; + struct kvec iov[3]; struct { struct rxrpc_wire_header whdr; union { - struct { - __be32 code; - } abort; - struct { - struct rxrpc_ackpacket ack; - u8 padding[3]; - struct rxrpc_ackinfo info; - }; + __be32 abort_code; + struct rxrpc_ackpacket ack; }; } __attribute__((packed)) pkt; + struct rxrpc_ackinfo ack_info; size_t len; - u32 serial, mtu, call_id; + int ioc; + u32 serial, mtu, call_id, padding; _enter("%d", conn->debug_id); @@ -66,6 +62,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, msg.msg_controllen = 0; msg.msg_flags = 0; + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt.whdr); + iov[1].iov_base = &padding; + iov[1].iov_len = 3; + iov[2].iov_base = &ack_info; + iov[2].iov_len = sizeof(ack_info); + pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid); pkt.whdr.callNumber = htonl(call_id); @@ -80,8 +83,10 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, len = sizeof(pkt.whdr); switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - pkt.abort.code = htonl(chan->last_abort); - len += sizeof(pkt.abort); + pkt.abort_code = htonl(chan->last_abort); + iov[0].iov_len += sizeof(pkt.abort_code); + len += sizeof(pkt.abort_code); + ioc = 1; break; case RXRPC_PACKET_TYPE_ACK: @@ -94,13 +99,19 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - pkt.info.rxMTU = htonl(rxrpc_rx_mtu); - pkt.info.maxMTU = htonl(mtu); - pkt.info.rwind = htonl(rxrpc_rx_window_size); - pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + ack_info.rxMTU = htonl(rxrpc_rx_mtu); + ack_info.maxMTU = htonl(mtu); + ack_info.rwind = htonl(rxrpc_rx_window_size); + ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); pkt.whdr.flags |= RXRPC_SLOW_START_OK; - len += sizeof(pkt.ack) + sizeof(pkt.info); + padding = 0; + iov[0].iov_len += sizeof(pkt.ack); + len += sizeof(pkt.ack) + 3 + sizeof(ack_info); + ioc = 3; break; + + default: + return; } /* Resync with __rxrpc_disconnect_call() and check that the last call @@ -110,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, if (READ_ONCE(chan->last_call) != call_id) return; - iov.iov_base = &pkt; - iov.iov_len = len; - serial = atomic_inc_return(&conn->serial); pkt.whdr.serial = htonl(serial); @@ -127,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; } - kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); _leave(""); return; } -- cgit v1.2.3 From 15bfe05c8d6386f1a90e9340d15336e85e32aad6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2017 11:01:09 +0100 Subject: net: ethernet: xilinx: Mark XILINX_LL_TEMAC broken on 64-bit On 64-bit (e.g. powerpc64/allmodconfig): drivers/net/ethernet/xilinx/ll_temac_main.c: In function 'temac_start_xmit_done': drivers/net/ethernet/xilinx/ll_temac_main.c:633:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); ^ cdmac_bd.app4 is u32, so it is too small to hold a kernel pointer. Note that several other fields in struct cdmac_bd are also too small to hold physical addresses on 64-bit platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2..da4ec575ccf9 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC -- cgit v1.2.3 From 282ef4729195c8503f7101d574acfb5e7c8a8209 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 28 Nov 2017 11:28:52 -0600 Subject: rxrpc: Fix variable overwrite Values assigned to both variable resend_at and ack_at are overwritten before they can be used. The correct fix here is to add 'now' to the previously computed value in resend_at and ack_at. Addresses-Coverity-ID: 1462262 Addresses-Coverity-ID: 1462263 Addresses-Coverity-ID: 1462264 Fixes: beb8e5e4f38c ("rxrpc: Express protocol timeouts in terms of RTT") Link: https://marc.info/?i=17004.1511808959%40warthog.procyon.org.uk Signed-off-by: Gustavo A. R. Silva Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 555274ddc514..ad2ab1103189 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -123,7 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, else ack_at = expiry; - ack_at = jiffies + expiry; + ack_at += now; if (time_before(ack_at, call->ack_at)) { WRITE_ONCE(call->ack_at, ack_at); rxrpc_reduce_call_timer(call, ack_at, now, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index a1c53ac066a1..09f2a3e05221 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -233,7 +233,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, if (resend_at < 1) resend_at = 1; - resend_at = now + rxrpc_resend_timeout; + resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); -- cgit v1.2.3 From a7e4fbbfdf7935333800bd801f6affc2515506f2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 29 Nov 2017 14:11:49 +0000 Subject: net: via: via-rhine: use %p to format void * address instead of %x Don't use %x and casting to print out an address, instead use %p and remove the casting. Cleans up smatch warnings: drivers/net/ethernet/via/via-rhine.c:998 rhine_init_one_common() warn: argument 4 to %lx specifier is cast from pointer Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 83e6f76eb965..33949248c829 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -995,8 +995,8 @@ static int rhine_init_one_common(struct device *hwdev, u32 quirks, else name = "Rhine III"; - netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n", - name, (long)ioaddr, dev->dev_addr, rp->irq); + netdev_info(dev, "VIA %s at %p, %pM, IRQ %d\n", + name, ioaddr, dev->dev_addr, rp->irq); dev_set_drvdata(hwdev, dev); -- cgit v1.2.3 From 644a1f19c6c8393d0c4168a5adf79056da6822eb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 27 Nov 2017 20:46:22 +0100 Subject: eeprom: at24: fix reading from 24MAC402/24MAC602 Chip datasheet mentions that word addresses other than the actual start position of the MAC delivers undefined results. So fix this. Current implementation doesn't work due to this wrong offset. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 783244b485cc..8ca6772b3baf 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -425,7 +425,8 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, memset(msg, 0, sizeof(msg)); msg[0].addr = client->addr; msg[0].buf = addrbuf; - addrbuf[0] = 0x90 + offset; + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; msg[0].len = 1; msg[1].addr = client->addr; msg[1].flags = I2C_M_RD; -- cgit v1.2.3 From d9bcd462daf34aebb8de9ad7f76de0198bb5a0f0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 24 Nov 2017 07:47:50 +0100 Subject: eeprom: at24: check at24_read/write arguments So far we completely rely on the caller to provide valid arguments. To be on the safe side perform an own sanity check. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 8ca6772b3baf..305a7a464d09 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -569,6 +569,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); @@ -614,6 +617,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return -EINVAL; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); -- cgit v1.2.3 From f6454f80e8a965fca203dab28723f68ec78db608 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 29 Nov 2017 15:20:00 +0100 Subject: ethernet: dwmac-stm32: Fix copyright Uniformize STMicroelectronics copyrights header Signed-off-by: Benjamin Gaignard CC: Alexandre Torgue Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 61cb24810d10..9e6db16af663 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -1,8 +1,8 @@ /* * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU * - * Copyright (C) Alexandre Torgue 2015 - * Author: Alexandre Torgue + * Copyright (C) STMicroelectronics SA 2017 + * Author: Alexandre Torgue for STMicroelectronics. * License terms: GNU General Public License (GPL), version 2 * */ -- cgit v1.2.3 From 88bc0ede8d35edc969350852894dc864a2dc1859 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: quota: Check for register_shrinker() failure. register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d38684ff2ebb..020c597ef9b6 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2992,7 +2992,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } -- cgit v1.2.3 From f55e1014f9e567d830eb3a7f57d879a34872af4b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 09:01:01 -0800 Subject: Revert "mm, thp: Do not make pmd/pud dirty without a reason" This reverts commit 152e93af3cfe2d29d8136cc0a02a8612507136ee. It was a nice cleanup in theory, but as Nicolai Stange points out, we do need to make the page dirty for the copy-on-write case even when we didn't end up making it writable, since the dirty bit is what we use to check that we've gone through a COW cycle. Reported-by: Michal Hocko Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 31 ++++++++++++------------------- mm/internal.h | 3 +-- mm/khugepaged.c | 2 +- mm/memory.c | 2 +- mm/migrate.c | 2 +- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f22401fd83b5..0e7ded98d114 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -474,13 +474,10 @@ out: } __setup("transparent_hugepage=", setup_transparent_hugepage); -pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty) +pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { - if (likely(vma->vm_flags & VM_WRITE)) { + if (likely(vma->vm_flags & VM_WRITE)) pmd = pmd_mkwrite(pmd); - if (dirty) - pmd = pmd_mkdirty(pmd); - } return pmd; } @@ -602,7 +599,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } entry = mk_huge_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr, true); mem_cgroup_commit_charge(page, memcg, false, true); lru_cache_add_active_or_unevictable(page, vma); @@ -744,8 +741,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); if (write) { - entry = pmd_mkyoung(entry); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = pmd_mkyoung(pmd_mkdirty(entry)); + entry = maybe_pmd_mkwrite(entry, vma); } if (pgtable) { @@ -791,14 +788,10 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma, - bool dirty) +static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) { - if (likely(vma->vm_flags & VM_WRITE)) { + if (likely(vma->vm_flags & VM_WRITE)) pud = pud_mkwrite(pud); - if (dirty) - pud = pud_mkdirty(pud); - } return pud; } @@ -814,8 +807,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); if (write) { - entry = pud_mkyoung(entry); - entry = maybe_pud_mkwrite(entry, vma, true); + entry = pud_mkyoung(pud_mkdirty(entry)); + entry = maybe_pud_mkwrite(entry, vma); } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); @@ -1286,7 +1279,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); ret |= VM_FAULT_WRITE; @@ -1356,7 +1349,7 @@ alloc: } else { pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); page_add_new_anon_rmap(new_page, vma, haddr, true); mem_cgroup_commit_charge(new_page, memcg, false, true); @@ -2935,7 +2928,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (is_write_migration_entry(entry)) - pmde = maybe_pmd_mkwrite(pmde, vma, false); + pmde = maybe_pmd_mkwrite(pmde, vma); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); page_add_anon_rmap(new, vma, mmun_start, true); diff --git a/mm/internal.h b/mm/internal.h index b35cdebda0ce..e6bd35182dae 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -328,8 +328,7 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) } } -extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, - bool dirty); +extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /* * At what user virtual address is page expected in @vma? diff --git a/mm/khugepaged.c b/mm/khugepaged.c index db43dc8a8ae6..ea4ff259b671 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm, pgtable = pmd_pgtable(_pmd); _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); - _pmd = maybe_pmd_mkwrite(_pmd, vma, false); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/memory.c b/mm/memory.c index b10c1d26f675..85e7a87da79f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) - entry = maybe_pmd_mkwrite(entry, vma, true); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); page_add_file_rmap(page, true); diff --git a/mm/migrate.c b/mm/migrate.c index 57865fc8cfe3..4d0be47a322a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, } entry = mk_huge_pmd(new_page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(entry, vma, false); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); /* * Clear the old entry under pagetable lock and establish the new PTE. -- cgit v1.2.3 From 668533dc0764b30c9dd2baf3ca800156f688326b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 10:30:13 -0800 Subject: kallsyms: take advantage of the new '%px' format The conditional kallsym hex printing used a special fixed-width '%lx' output (KALLSYM_FMT) in preparation for the hashing of %p, but that series ended up adding a %px specifier to help with the conversions. Use it, and avoid the "print pointer as an unsigned long" code. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 6 ------ kernel/kallsyms.c | 8 ++++---- kernel/module.c | 6 +++--- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 708f337d780b..bd118a6c60cb 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,12 +14,6 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) -#ifndef CONFIG_64BIT -# define KALLSYM_FMT "%08lx" -#else -# define KALLSYM_FMT "%016lx" -#endif - struct module; #ifdef CONFIG_KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 531ffa984bc2..d5fa4116688a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -614,14 +614,14 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { - unsigned long value; + void *value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; - value = iter->show_value ? iter->value : 0; + value = iter->show_value ? (void *)iter->value : NULL; if (iter->module_name[0]) { char type; @@ -632,10 +632,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, + seq_printf(m, "%px %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, KALLSYM_FMT " %c %s\n", value, + seq_printf(m, "%px %c %s\n", value, iter->type, iter->name); return 0; } diff --git a/kernel/module.c b/kernel/module.c index f0411a271765..dea01ac9cb74 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4157,7 +4157,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; - unsigned long value; + void *value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4173,8 +4173,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - value = m->private ? 0 : (unsigned long)mod->core_layout.base; - seq_printf(m, " 0x" KALLSYM_FMT, value); + value = m->private ? NULL : mod->core_layout.base; + seq_printf(m, " 0x%px", value); /* Taints info */ if (mod->taints) -- cgit v1.2.3 From 445f288d706cb5a418b13c340280b47d4585d667 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Nov 2017 13:50:11 -0500 Subject: NFSv4: Ensure gcc 4.4.4 can compile initialiser for "invalid_stateid" gcc 4.4.4 is too old to have full C11 anonymous union support, so the current initialiser fails to compile. Reported-by: Boris Ostrovsky Signed-off-by: Trond Myklebust (compile-)Tested-by: Boris Ostrovsky Reviewed-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 54fd56d715a8..e4f4a09ed9f4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,8 +71,8 @@ const nfs4_stateid zero_stateid = { }; const nfs4_stateid invalid_stateid = { { - .seqid = cpu_to_be32(0xffffffffU), - .other = { 0 }, + /* Funky initialiser keeps older gcc versions happy */ + .data = { 0xff, 0xff, 0xff, 0xff, 0 }, }, .type = NFS4_INVALID_STATEID_TYPE, }; -- cgit v1.2.3 From 4ba161a793d5f43757c35feff258d9f20a082940 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: SUNRPC: Allow connect to return EHOSTUNREACH Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 684e356b40e4..c2b2d489b57b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2440,6 +2440,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* -- cgit v1.2.3 From ef0010a30935de4e0211cbc7bdffc30446cdee9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 11:28:09 -0800 Subject: vsprintf: don't use 'restricted_pointer()' when not restricting Instead, just fall back on the new '%p' behavior which hashes the pointer. Otherwise, '%pK' - that was intended to mark a pointer as restricted - just ends up leaking pointers that a normal '%p' wouldn't leak. Which just make the whole thing pointless. I suspect we should actually get rid of '%pK' entirely, and make it just work as '%p' regardless, but this is the minimal obvious fix. People who actually use 'kptr_restrict' should weigh in on which behavior they want. Cc: Tobin Harding Cc: Kees Cook Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d960aead0336..01c3957b2de6 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1931,6 +1931,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': + if (!kptr_restrict) + break; return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, fmt); -- cgit v1.2.3 From 1569d651f152870663fabd8f1c80af353f967ad5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 27 Nov 2017 13:12:35 +0100 Subject: drm/ttm: fix populate_and_map() functions once more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts "drm/ttm: Fix configuration error around populate_and_map() functions". This fix has gone into the wrong direction. Those helpers should be available even when neither CONFIG_INTEL_IOMMU nor CONFIG_SWIOTLB are set. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 -- include/drm/ttm/ttm_page_alloc.h | 32 ++++++++++---------------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 316f831ad5f0..6d48ccca0b38 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1058,7 +1058,6 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm) } EXPORT_SYMBOL(ttm_pool_unpopulate); -#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) { unsigned i, j; @@ -1129,7 +1128,6 @@ void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) ttm_pool_unpopulate(&tt->ttm); } EXPORT_SYMBOL(ttm_unmap_and_unpopulate_pages); -#endif int ttm_page_alloc_debugfs(struct seq_file *m, void *data) { diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 38a2b4770c35..593811362a91 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -58,12 +58,21 @@ int ttm_pool_populate(struct ttm_tt *ttm); */ void ttm_pool_unpopulate(struct ttm_tt *ttm); +/** + * Populates and DMA maps pages to fullfil a ttm_dma_populate() request + */ +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); + +/** + * Unpopulates and DMA unmaps pages as part of a + * ttm_dma_unpopulate() request */ +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); + /** * Output the state of pools to debugfs file */ int ttm_page_alloc_debugfs(struct seq_file *m, void *data); - #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) /** * Initialize pool allocator. @@ -83,17 +92,6 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); - -/** - * Populates and DMA maps pages to fullfil a ttm_dma_populate() request - */ -int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); - -/** - * Unpopulates and DMA unmaps pages as part of a - * ttm_dma_unpopulate() request */ -void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); - #else static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) @@ -116,16 +114,6 @@ static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { } - -static inline int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) -{ - return -ENOMEM; -} - -static inline void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) -{ -} - #endif #endif -- cgit v1.2.3 From 7fdf165a52505392eb059902b0df55e79a45c25f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 29 Nov 2017 17:26:56 +0100 Subject: drm/radeon: remove init of CIK VMIDs 8-16 for amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMIDs 8-16 in Kaveri were reserved for use by the amdkfd driver. Because we removed amdkfd support from radeon, those VMIDs are now used by radeon and are initialized by radeon. This patch removes the function that initialized those VMIDs for amdkfd use. This initialization overridden the radeon initialization and caused GPU faults and GUI crashed. Fixes: f4fa88ab28ab ("drm/radeon: deprecate and remove KFD interface") Rported-by: Michel Dänzer Acked-by: Christian König Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 898f9a078830..a6511918f632 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5451,28 +5451,6 @@ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) WREG32(VM_INVALIDATE_REQUEST, 0x1); } -static void cik_pcie_init_compute_vmid(struct radeon_device *rdev) -{ - int i; - uint32_t sh_mem_bases, sh_mem_config; - - sh_mem_bases = 0x6000 | 0x6000 << 16; - sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); - sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); - - mutex_lock(&rdev->srbm_mutex); - for (i = 8; i < 16; i++) { - cik_srbm_select(rdev, 0, 0, 0, i); - /* CP and shaders */ - WREG32(SH_MEM_CONFIG, sh_mem_config); - WREG32(SH_MEM_APE1_BASE, 1); - WREG32(SH_MEM_APE1_LIMIT, 0); - WREG32(SH_MEM_BASES, sh_mem_bases); - } - cik_srbm_select(rdev, 0, 0, 0, 0); - mutex_unlock(&rdev->srbm_mutex); -} - /** * cik_pcie_gart_enable - gart enable * @@ -5586,8 +5564,6 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); - cik_pcie_init_compute_vmid(rdev); - cik_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(rdev->mc.gtt_size >> 20), -- cgit v1.2.3 From e5372cd5ef12b05ae74d608f95dc53fe06558867 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Nov 2017 15:09:29 -0500 Subject: sparc64: Fix boot on T4 and later. If we don't put the NG4fls.o object into the same part of the link as the generic sparc64 objects for fls() and __fls() then the relocation in the branch we use for patching will not fit. Move NG4fls.o into lib-y to fix this problem. Fixes: 46ad8d2d22c1 ("sparc64: Use sparc optimized fls and __fls for T4 and above") Signed-off-by: David S. Miller Reported-by: Anatoly Pugachev Tested-by: Anatoly Pugachev --- arch/sparc/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 0f0f76b4f6cd..063556fe2cb1 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -19,7 +19,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o -obj-$(CONFIG_SPARC64) += NG4fls.o +lib-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o -- cgit v1.2.3 From 9b85c2d4508563f4bb1de0d971ed02fea0d0d757 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:28 -0800 Subject: tools: bpftool: fix crash on bad parameters with JSON If bad or unrecognised parameters are specified after JSON output is requested, `usage()` will try to output null JSON object before the writer is created. To prevent this, create the writer as soon as the `--json` option is parsed. Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands") Reported-by: Jakub Kicinski Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d6e4762170a4..14ad54a1c404 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -291,7 +291,15 @@ int main(int argc, char **argv) pretty_output = true; /* fall through */ case 'j': - json_output = true; + if (!json_output) { + json_wtr = jsonw_new(stdout); + if (!json_wtr) { + p_err("failed to create JSON writer"); + return -1; + } + json_output = true; + } + jsonw_pretty(json_wtr, pretty_output); break; case 'f': show_pinned = true; @@ -306,15 +314,6 @@ int main(int argc, char **argv) if (argc < 0) usage(); - if (json_output) { - json_wtr = jsonw_new(stdout); - if (!json_wtr) { - p_err("failed to create JSON writer"); - return -1; - } - jsonw_pretty(json_wtr, pretty_output); - } - bfd_init(); ret = cmd_select(cmds, argc, argv, do_help); -- cgit v1.2.3 From 7868620a3c15dbc661fb5d849de403ac04624c50 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:29 -0800 Subject: tools: bpftool: clean up the JSON writer before exiting in usage() The writer is cleaned at the end of the main function, but not if the program exits sooner in usage(). Let's keep it clean and destroy the writer before exiting. Destruction and actual call to exit() are moved to another function so that clean exit can also be performed without printing usage() hints. Fixes: d35efba99d92 ("tools: bpftool: introduce --json and --pretty options") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 10 +++++++++- tools/bpf/bpftool/main.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 14ad54a1c404..d72dd73a4016 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -58,11 +58,19 @@ bool show_pinned; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; +static void __noreturn clean_and_exit(int i) +{ + if (json_output) + jsonw_destroy(&json_wtr); + + exit(i); +} + void usage(void) { last_do_help(last_argc - 1, last_argv + 1); - exit(-1); + clean_and_exit(-1); } static int do_help(int argc, char **argv) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9c191e222d6f..0b60ddfb2b93 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,7 @@ void p_info(const char *fmt, ...); bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); -void usage(void) __attribute__((noreturn)); +void usage(void) __noreturn; struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); -- cgit v1.2.3 From 146882a37da7aa566c7ec088b42c6495d769f2ba Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:30 -0800 Subject: tools: bpftool: make error message from getopt_long() JSON-friendly If `getopt_long()` meets an unknown option, it prints its own error message to standard error output. While this does not strictly break JSON output, it is the only case bpftool prints something to standard error output if JSON output is required. All other errors are printed on standard output as JSON objects, so that an external program does not have to parse stderr. This is changed by setting the global variable `opterr` to 0. Furthermore, p_err() is used to reproduce the error message in a more JSON-friendly way, so that users still get to know what the erroneous option is. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d72dd73a4016..d294bc8168be 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -288,6 +288,7 @@ int main(int argc, char **argv) hash_init(prog_table.table); hash_init(map_table.table); + opterr = 0; while ((opt = getopt_long(argc, argv, "Vhpjf", options, NULL)) >= 0) { switch (opt) { @@ -313,7 +314,11 @@ int main(int argc, char **argv) show_pinned = true; break; default: - usage(); + p_err("unrecognized option '%s'", argv[optind - 1]); + if (json_output) + clean_and_exit(-1); + else + usage(); } } -- cgit v1.2.3 From 0d954eeb99eee63964a07a68fcac61e1df8488e7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:31 -0800 Subject: tools: bpftool: remove spurious line break from error message The end-of-line character inside the string would break JSON compliance. Remove it, `p_err()` already adds a '\n' character for plain output anyway. Fixes: 9a5ab8bf1d6d ("tools: bpftool: turn err() and info() macros into functions") Reported-by: Jakub Kicinski Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0b60ddfb2b93..bff330b49791 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -51,7 +51,7 @@ #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) -#define BAD_ARG() ({ p_err("what is '%s'?\n", *argv); -1; }) +#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; }) #define ERR_MAX_LEN 1024 -- cgit v1.2.3 From 507e590da398a0e3438d563b5e736c3f2a7749d7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:32 -0800 Subject: tools: bpftool: unify installation directories Programs and documentation not managed by package manager are generally installed under /usr/local/, instead of the user's home directory. In particular, `man` is generally able to find manual pages under `/usr/local/share/man`. bpftool generally follows perf's example, and perf installs to home directory. However bpftool requires root credentials, so it seems sensible to follow the more common convention of installing files under /usr/local instead. So, make /usr/local the default prefix for installing the binary with `make install`, and the documentation with `make doc-install`. Also, create /usr/local/sbin if it does not exist. Note that the bash-completion file, however, is still installed under /usr/share/bash-completion/completions, as the default setup for bash does not attempt to load completion files under /usr/local/. Reported-by: David Beckett Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 2 +- tools/bpf/bpftool/Makefile | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index bde77d7c4390..37292bb5ce60 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -6,7 +6,7 @@ RM ?= rm -f # Make the path relative to DESTDIR, not prefix ifndef DESTDIR -prefix?=$(HOME) +prefix ?= /usr/local endif mandir ?= $(prefix)/share/man man8dir = $(mandir)/man8 diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 813826c50936..c5b21f2cbca5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -45,8 +45,8 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr -bash_compdir ?= $(prefix)/share/bash-completion/completions +prefix = /usr/local +bash_compdir ?= /usr/share/bash-completion/completions CC = gcc @@ -76,6 +76,7 @@ clean: $(LIBBPF)-clean $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d install: + install -m 0755 -d $(prefix)/sbin install $(OUTPUT)bpftool $(prefix)/sbin/bpftool install -m 0755 -d $(bash_compdir) install -m 0644 bash-completion/bpftool $(bash_compdir) -- cgit v1.2.3 From ad3cda064402b69148faf5f7cd1ac8c2eee52645 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:33 -0800 Subject: tools: bpftool: declare phony targets as such In the Makefile, targets install, doc and doc-install should be added to .PHONY. Let's fix this. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c5b21f2cbca5..ec3052c0b004 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -89,5 +89,5 @@ doc-install: FORCE: -.PHONY: all clean FORCE +.PHONY: all clean FORCE install doc doc-install .DEFAULT_GOAL := all -- cgit v1.2.3 From 23721a755f98ac846897a013c92cccb281c1bcc8 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Thu, 30 Nov 2017 09:41:29 +0800 Subject: trace/xdp: fix compile warning: 'struct bpf_map' declared inside parameter list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We meet this compile warning, which caused by missing bpf.h in xdp.h. In file included from ./include/trace/events/xdp.h:10:0, from ./include/linux/bpf_trace.h:6, from drivers/net/ethernet/intel/i40e/i40e_txrx.c:29: ./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration const struct bpf_map *map, u32 map_index), ^ ./include/linux/tracepoint.h:187:34: note: in definition of macro ‘__DECLARE_TRACE’ static inline void trace_##name(proto) \ ^~~~~ ./include/linux/tracepoint.h:352:24: note: in expansion of macro ‘PARAMS’ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ ^~~~~~ ./include/linux/tracepoint.h:477:2: note: in expansion of macro ‘DECLARE_TRACE’ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~~~~~~~~~~~~ ./include/linux/tracepoint.h:477:22: note: in expansion of macro ‘PARAMS’ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~~~~~ ./include/trace/events/xdp.h:89:1: note: in expansion of macro ‘DEFINE_EVENT’ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, ^~~~~~~~~~~~ ./include/trace/events/xdp.h:90:2: note: in expansion of macro ‘TP_PROTO’ TP_PROTO(const struct net_device *dev, ^~~~~~~~ ./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration const struct bpf_map *map, u32 map_index), ^ ./include/linux/tracepoint.h:203:38: note: in definition of macro ‘__DECLARE_TRACE’ register_trace_##name(void (*probe)(data_proto), void *data) \ ^~~~~~~~~~ ./include/linux/tracepoint.h:354:4: note: in expansion of macro ‘PARAMS’ PARAMS(void *__data, proto), \ ^~~~~~ Reported-by: Huang Daode Cc: Hanjun Guo Fixes: 8d3b778ff544 ("xdp: tracepoint xdp_redirect also need a map argument") Signed-off-by: Xie XiuQi Acked-by: Jesper Dangaard Brouer Acked-by: Steven Rostedt (VMware) Signed-off-by: Daniel Borkmann --- include/trace/events/xdp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 4cd0f05d0113..8989a92c571a 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -8,6 +8,7 @@ #include #include #include +#include #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ -- cgit v1.2.3 From 4b81cb2ff69c8a8e297a147d2eb4d9b5e8d7c435 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:09:54 -0800 Subject: mm, memory_hotplug: do not back off draining pcp free pages from kworker context drain_all_pages backs off when called from a kworker context since commit 0ccce3b92421 ("mm, page_alloc: drain per-cpu pages from workqueue context") because the original IPI based pcp draining has been replaced by a WQ based one and the check wanted to prevent from recursion and inter workers dependencies. This has made some sense at the time because the system WQ has been used and one worker holding the lock could be blocked while waiting for new workers to emerge which can be a problem under OOM conditions. Since then commit ce612879ddc7 ("mm: move pcp and lru-pcp draining into single wq") has moved draining to a dedicated (mm_percpu_wq) WQ with a rescuer so we shouldn't depend on any other WQ activity to make a forward progress so calling drain_all_pages from a worker context is safe as long as this doesn't happen from mm_percpu_wq itself which is not the case because all workers are required to _not_ depend on any MM locks. Why is this a problem in the first place? ACPI driven memory hot-remove (acpi_device_hotplug) is executed from the worker context. We end up calling __offline_pages to free all the pages and that requires both lru_add_drain_all_cpuslocked and drain_all_pages to do their job otherwise we can have dangling pages on pcp lists and fail the offline operation (__test_page_isolated_in_pageblock would see a page with 0 ref count but without PageBuddy set). Fix the issue by removing the worker check in drain_all_pages. lru_add_drain_all_cpuslocked doesn't have this restriction so it works as expected. Link: http://lkml.kernel.org/r/20170828093341.26341-1-mhocko@kernel.org Fixes: 0ccce3b924212 ("mm, page_alloc: drain per-cpu pages from workqueue context") Signed-off-by: Michal Hocko Cc: Mel Gorman Cc: Tejun Heo Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4096f4a5c1f..145f5181e4cd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone) if (WARN_ON_ONCE(!mm_percpu_wq)) return; - /* Workqueues cannot recurse */ - if (current->flags & PF_WQ_WORKER) - return; - /* * Do not drain if one is already in progress unless it's specific to * a zone. Such callers are primarily CMA and memory hotplug and need -- cgit v1.2.3 From 687cb0884a714ff484d038e9190edc874edcf146 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 29 Nov 2017 16:09:58 -0800 Subject: mm, oom_reaper: gather each vma to prevent leaking TLB entry tlb_gather_mmu(&tlb, mm, 0, -1) means gathering the whole virtual memory space. In this case, tlb->fullmm is true. Some archs like arm64 doesn't flush TLB when tlb->fullmm is true: commit 5a7862e83000 ("arm64: tlbflush: avoid flushing when fullmm == 1"). Which causes leaking of tlb entries. Will clarifies his patch: "Basically, we tag each address space with an ASID (PCID on x86) which is resident in the TLB. This means we can elide TLB invalidation when pulling down a full mm because we won't ever assign that ASID to another mm without doing TLB invalidation elsewhere (which actually just nukes the whole TLB). I think that means that we could potentially not fault on a kernel uaccess, because we could hit in the TLB" There could be a window between complete_signal() sending IPI to other cores and all threads sharing this mm are really kicked off from cores. In this window, the oom reaper may calls tlb_flush_mmu_tlbonly() to flush TLB then frees pages. However, due to the above problem, the TLB entries are not really flushed on arm64. Other threads are possible to access these pages through TLB entries. Moreover, a copy_to_user() can also write to these pages without generating page fault, causes use-after-free bugs. This patch gathers each vma instead of gathering full vm space. In this case tlb->fullmm is not true. The behavior of oom reaper become similar to munmapping before do_exit, which should be safe for all archs. Link: http://lkml.kernel.org/r/20171107095453.179940-1-wangnan0@huawei.com Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Wang Nan Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Minchan Kim Cc: Will Deacon Cc: Bob Liu Cc: Ingo Molnar Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c86fbd1b590e..c957be32b27a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ set_bit(MMF_UNSTABLE, &mm->flags); - tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; @@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, NULL); + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); + } } - tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), -- cgit v1.2.3 From 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 29 Nov 2017 16:10:01 -0800 Subject: mm/cma: fix alloc_contig_range ret code/potential leak If the call __alloc_contig_migrate_range() in alloc_contig_range returns -EBUSY, processing continues so that test_pages_isolated() is called where there is a tracepoint to identify the busy pages. However, it is possible for busy pages to become available between the calls to these two routines. In this case, the range of pages may be allocated. Unfortunately, the original return code (ret == -EBUSY) is still set and returned to the caller. Therefore, the caller believes the pages were not allocated and they are leaked. Update the comment to indicate that allocation is still possible even if __alloc_contig_migrate_range returns -EBUSY. Also, clear return code in this case so that it is not accidentally used or returned to caller. Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure") Signed-off-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Joonsoo Kim Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 145f5181e4cd..73f5d4556b3d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7652,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* * In case of -EBUSY, we'd like to know which page causes problem. - * So, just fall through. We will check it in test_pages_isolated(). + * So, just fall through. test_pages_isolated() has a tracepoint + * which will report the busy page. + * + * It is possible that busy pages could become available before + * the call to test_pages_isolated, and the range will actually be + * allocated. So, if we fall through be sure to clear ret so that + * -EBUSY is not accidentally used or returned to caller. */ ret = __alloc_contig_migrate_range(&cc, start, end); if (ret && ret != -EBUSY) goto done; + ret =0; /* * Pages from [start, end) are within a MAX_ORDER_NR_PAGES -- cgit v1.2.3 From 1501899a898dfb5477c55534bdfd734c046da06d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:06 -0800 Subject: mm: fix device-dax pud write-faults triggered by get_user_pages() Currently only get_user_pages_fast() can safely handle the writable gup case due to its use of pud_access_permitted() to check whether the pud entry is writable. In the gup slow path pud_write() is used instead of pud_access_permitted() and to date it has been unimplemented, just calls BUG_ON(). kernel BUG at ./include/linux/hugetlb.h:244! [..] RIP: 0010:follow_devmap_pud+0x482/0x490 [..] Call Trace: follow_page_mask+0x28c/0x6e0 __get_user_pages+0xe4/0x6c0 get_user_pages_unlocked+0x130/0x1b0 get_user_pages_fast+0x89/0xb0 iov_iter_get_pages_alloc+0x114/0x4a0 nfs_direct_read_schedule_iovec+0xd2/0x350 ? nfs_start_io_direct+0x63/0x70 nfs_file_direct_read+0x1e0/0x250 nfs_file_read+0x90/0xc0 For now this just implements a simple check for the _PAGE_RW bit similar to pmd_write. However, this implies that the gup-slow-path check is missing the extra checks that the gup-fast-path performs with pud_access_permitted. Later patches will align all checks to use the 'access_permitted' helper if the architecture provides it. Note that the generic 'access_permitted' helper fallback is the simple _PAGE_RW check on architectures that do not define the 'access_permitted' helper(s). [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129126165.37405.16031785266675461397.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043109938.2842.14834662818213616199.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Dan Williams Reported-by: Stephen Rothwell Acked-by: Thomas Gleixner [x86] Cc: Kirill A. Shutemov Cc: Catalin Marinas Cc: "David S. Miller" Cc: Dave Hansen Cc: Will Deacon Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 6 ++++++ include/asm-generic/pgtable.h | 8 ++++++++ include/linux/hugetlb.h | 8 -------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 09f9e1e00e3b..dcce76ee4aa7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; +} + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 757dc6ffc7ba..1ac457511f4e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -814,6 +814,14 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif /* pud_write */ + #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fbf5b31d47ee..82a25880714a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd) } #endif -#ifndef pud_write -static inline int pud_write(pud_t pud) -{ - BUG(); - return 0; -} -#endif - #define HUGETLB_ANON_FILE "anon_hugepage" enum { -- cgit v1.2.3 From e4e40e0263ea6a3bfefbfd15d1b6ff5c03f2b95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:10 -0800 Subject: mm: switch to 'define pmd_write' instead of __HAVE_ARCH_PMD_WRITE In response to compile breakage introduced by a series that added the pud_write helper to x86, Stephen notes: did you consider using the other paradigm: In arch include files: #define pud_write pud_write static inline int pud_write(pud_t pud) ..... Then in include/asm-generic/pgtable.h: #ifndef pud_write tatic inline int pud_write(pud_t pud) { .... } #endif If you had, then the powerpc code would have worked ... ;-) and many of the other interfaces in include/asm-generic/pgtable.h are protected that way ... Given that some architecture already define pmd_write() as a macro, it's a net reduction to drop the definition of __HAVE_ARCH_PMD_WRITE. Link: http://lkml.kernel.org/r/151129126721.37405.13339850900081557813.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Suggested-by: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: "Aneesh Kumar K.V" Cc: Oliver OHalloran Cc: Chris Metcalf Cc: Russell King Cc: Ralf Baechle Cc: "H. Peter Anvin" Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable-3level.h | 1 - arch/arm64/include/asm/pgtable.h | 1 - arch/mips/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/s390/include/asm/pgtable.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/tile/include/asm/pgtable.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- include/asm-generic/pgtable.h | 4 ++-- 9 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a029bceaf2f..1a7a17b2a1ba 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte) } #define __HAVE_ARCH_PTE_SPECIAL -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pud_page(pud) pmd_page(__pmd(pud_val(pud))) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca8..149d05fb9421 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9e9e94415d08..1a508a74d48d 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_WRITE); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f..44697817ccc6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d..0a6b0286c32e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 5a9e96be1665..9937c5ff94a9 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 2a26cc4fefc2..adfa21b18488 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) -#define __HAVE_ARCH_PMD_WRITE #define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dcce76ee4aa7..95e2dfd75521 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_RW; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 1ac457511f4e..b234d54f2cb6 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -805,13 +805,13 @@ static inline int pmd_trans_huge(pmd_t pmd) { return 0; } -#ifndef __HAVE_ARCH_PMD_WRITE +#ifndef pmd_write static inline int pmd_write(pmd_t pmd) { BUG(); return 0; } -#endif /* __HAVE_ARCH_PMD_WRITE */ +#endif /* pmd_write */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifndef pud_write -- cgit v1.2.3 From e7fe7b5cae90cf85bb6fed5ec5d4c5cf311a4fe9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:14 -0800 Subject: mm: replace pud_write with pud_access_permitted in fault + gup paths The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pud_write is must be referencing user-memory. [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129127237.37405.16073414520854722485.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043110453.2842.2166049702068628177.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: "David S. Miller" Cc: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ++++++ arch/sparc/mm/gup.c | 2 +- mm/huge_memory.c | 2 +- mm/memory.c | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0a6b0286c32e..57d7bc92e0b8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5335ba3c850e..5ae2d0a01a70 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (write && !pud_write(pud)) + if (!pud_access_permitted(pud, write)) return 0; refs = 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f22401fd83b5..4fe2491056a0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1019,7 +1019,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pud_lockptr(mm, pud)); - if (flags & FOLL_WRITE && !pud_write(*pud)) + if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) return NULL; if (pud_present(*pud) && pud_devmap(*pud)) diff --git a/mm/memory.c b/mm/memory.c index b10c1d26f675..25d283d46ea3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* NUMA case for anonymous PUDs would go here */ - if (dirty && !pud_write(orig_pud)) { + if (dirty && !pud_access_permitted(orig_pud, WRITE)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; -- cgit v1.2.3 From c7da82b894e9eef60a04a15f065a8502341bf13b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:18 -0800 Subject: mm: replace pmd_write with pmd_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pmd_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111049.2842.15241454964150083466.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/mm/gup.c | 2 +- fs/dax.c | 3 ++- mm/hmm.c | 4 ++-- mm/huge_memory.c | 4 ++-- mm/memory.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5ae2d0a01a70..33c0f8bb0f33 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (write && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write)) return 0; refs = 0; diff --git a/fs/dax.c b/fs/dax.c index 95981591977a..78b72c48374e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; - if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) + if (!pmd_dirty(*pmdp) + && !pmd_access_permitted(*pmdp, WRITE)) goto unlock_pmd; flush_cache_page(vma, address, pfn); diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60..93718a391611 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -391,11 +391,11 @@ again: if (pmd_protnone(pmd)) return hmm_vma_walk_clear(start, end, walk); - if (write_fault && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write_fault)) return hmm_vma_walk_clear(start, end, walk); pfn = pmd_pfn(pmd) + pte_index(addr); - flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; + flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; for (; addr < end; addr += PAGE_SIZE, i++, pfn++) pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4fe2491056a0..05b729f45e8a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -877,7 +877,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, */ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) return NULL; if (pmd_present(*pmd) && pmd_devmap(*pmd)) @@ -1393,7 +1393,7 @@ out_unlock: */ static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) { - return pmd_write(pmd) || + return pmd_access_permitted(pmd, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); } diff --git a/mm/memory.c b/mm/memory.c index 25d283d46ea3..416e451a707e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); - if (dirty && !pmd_write(orig_pmd)) { + if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; -- cgit v1.2.3 From 5c9d2d5c269c498aa9a546e8d2158a3e4142a1a2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:21 -0800 Subject: mm: replace pte_write with pte_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pte_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111604.2842.8051684481794973100.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- mm/hmm.c | 4 ++-- mm/memory.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index dfcde13f289a..85cc822fd403 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { - return pte_write(pte) || + return pte_access_permitted(pte, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); } diff --git a/mm/hmm.c b/mm/hmm.c index 93718a391611..3a5c172af560 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -456,11 +456,11 @@ again: continue; } - if (write_fault && !pte_write(pte)) + if (!pte_access_permitted(pte, write_fault)) goto fault; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; - pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; + pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; continue; fault: diff --git a/mm/memory.c b/mm/memory.c index 416e451a707e..4f07acd1695f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf) if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) + if (!pte_access_permitted(entry, WRITE)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } @@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; - if ((flags & FOLL_WRITE) && !pte_write(pte)) + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); -- cgit v1.2.3 From 95a87982541932503d3f59aba4c30b0bde0a6294 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Wed, 29 Nov 2017 16:10:25 -0800 Subject: scripts/faddr2line: extend usage on generic arch When cross-compiling, fadd2line should use the binary tool used for the target system, rather than that of the host. Link: http://lkml.kernel.org/r/20171121092911.GA150711@sofia Signed-off-by: Liu Changcheng Cc: Kate Stewart Cc: NeilBrown Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f5ce959f596..39e07d8574dd 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,9 +44,16 @@ set -o errexit set -o nounset +READELF="${CROSS_COMPILE}readelf" +ADDR2LINE="${CROSS_COMPILE}addr2line" +SIZE="${CROSS_COMPILE}size" +NM="${CROSS_COMPILE}nm" + command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" -command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" +command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" usage() { echo "usage: faddr2line ..." >&2 @@ -69,10 +76,10 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return - local file_line=$(addr2line -e $objfile $start_kernel_addr) + local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) [[ -z $file_line ]] && return local prefix=${file_line%init/main.c:*} @@ -104,7 +111,7 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. - file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') + file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} @@ -156,10 +163,10 @@ __faddr2line() { # pass real address to addr2line echo "$func+$offset/$sym_size:" - addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" + ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage -- cgit v1.2.3 From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:28 -0800 Subject: mm, hugetlbfs: introduce ->split() to vm_operations_struct Patch series "device-dax: fix unaligned munmap handling" When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and fail attempts to split vmas into unaligned ranges. It would be messy to teach the munmap path about device-dax alignment constraints in the same (hstate) way that hugetlbfs communicates this constraint. Instead, these patches introduce a new ->split() vm operation. This patch (of 2): The device-dax interface has similar constraints as hugetlbfs in that it requires the munmap path to unmap in huge page aligned units. Rather than add more custom vma handling code in __split_vma() introduce a new vm operation to perform this vma specific check. Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Cc: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/hugetlb.c | 8 ++++++++ mm/mmap.c | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ee073146aaa7..b3b6a7e313e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -377,6 +377,7 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 681b300185c0..698e8fb34031 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) } } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; + return 0; +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close, + .split = hugetlb_vm_op_split, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, diff --git a/mm/mmap.c b/mm/mmap.c index 924839fac0e6..a4d546821214 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - if (is_vm_hugetlb_page(vma) && (addr & - ~(huge_page_mask(hstate_vma(vma))))) - return -EINVAL; + if (vma->vm_ops && vma->vm_ops->split) { + err = vma->vm_ops->split(vma, addr); + if (err) + return err; + } new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) -- cgit v1.2.3 From 9702cffdbf2129516db679e4467db81e1cd287da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:32 -0800 Subject: device-dax: implement ->split() to catch invalid munmap attempts Similar to how device-dax enforces that the 'address', 'offset', and 'len' parameters to mmap() be aligned to the device's fundamental alignment, the same constraints apply to munmap(). Implement ->split() to fail munmap calls that violate the alignment constraint. Otherwise, we later fail VM_BUG_ON checks in the unmap_page_range() path with crash signatures of the form: vma ffff8800b60c8a88 start 00007f88c0000000 end 00007f88c0e00000 next (null) prev (null) mm ffff8800b61150c0 prot 8000000000000027 anon_vma (null) vm_ops ffffffffa0091240 pgoff 0 file ffff8800b638ef80 private_data (null) flags: 0x380000fb(read|write|shared|mayread|maywrite|mayexec|mayshare|softdirty|mixedmap|hugepage) ------------[ cut here ]------------ kernel BUG at mm/huge_memory.c:2014! [..] RIP: 0010:__split_huge_pud+0x12a/0x180 [..] Call Trace: unmap_page_range+0x245/0xa40 ? __vma_adjust+0x301/0x990 unmap_vmas+0x4c/0xa0 unmap_region+0xae/0x120 ? __vma_rb_erase+0x11a/0x230 do_munmap+0x276/0x410 vm_munmap+0x6a/0xa0 SyS_munmap+0x1d/0x30 Link: http://lkml.kernel.org/r/151130418681.4029.7118245855057952010.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Reported-by: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6833ada237ab..7b0bf825c4e7 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf) return dev_dax_huge_fault(vmf, PE_SIZE_PTE); } +static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr) +{ + struct file *filp = vma->vm_file; + struct dev_dax *dev_dax = filp->private_data; + struct dax_region *dax_region = dev_dax->region; + + if (!IS_ALIGNED(addr, dax_region->align)) + return -EINVAL; + return 0; +} + static const struct vm_operations_struct dax_vm_ops = { .fault = dev_dax_fault, .huge_fault = dev_dax_huge_fault, + .split = dev_dax_split, }; static int dax_mmap(struct file *filp, struct vm_area_struct *vma) -- cgit v1.2.3 From 2bb6d2837083de722bfdc369cb0d76ce188dd9b4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:35 -0800 Subject: mm: introduce get_user_pages_longterm Patch series "introduce get_user_pages_longterm()", v2. Here is a new get_user_pages api for cases where a driver intends to keep an elevated page count indefinitely. This is distinct from usages like iov_iter_get_pages where the elevated page counts are transient. The iov_iter_get_pages cases immediately turn around and submit the pages to a device driver which will put_page when the i/o operation completes (under kernel control). In the longterm case userspace is responsible for dropping the page reference at some undefined point in the future. This is untenable for filesystem-dax case where the filesystem is in control of the lifetime of the block / page and needs reasonable limits on how long it can wait for pages in a mapping to become idle. Fixing filesystems to actually wait for dax pages to be idle before blocks from a truncate/hole-punch operation are repurposed is saved for a later patch series. Also, allowing longterm registration of dax mappings is a future patch series that introduces a "map with lease" semantic where the kernel can revoke a lease and force userspace to drop its page references. I have also tagged these for -stable to purposely break cases that might assume that longterm memory registrations for filesystem-dax mappings were supported by the kernel. The behavior regression this policy change implies is one of the reasons we maintain the "dax enabled. Warning: EXPERIMENTAL, use at your own risk" notification when mounting a filesystem in dax mode. It is worth noting the device-dax interface does not suffer the same constraints since it does not support file space management operations like hole-punch. This patch (of 4): Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow long standing memory registrations against filesytem-dax vmas. Device-dax vmas do not have this problem and are explicitly allowed. This is temporary until a "memory registration with layout-lease" mechanism can be implemented for the affected sub-systems (RDMA and V4L2). [akpm@linux-foundation.org: use kcalloc()] Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Suggested-by: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 14 ++++++++++++ include/linux/mm.h | 13 +++++++++++ mm/gup.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index bbd92da0946e..9dc498d16cc1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma) return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } +static inline bool vma_is_fsdax(struct vm_area_struct *vma) +{ + struct inode *inode; + + if (!vma->vm_file) + return false; + if (!vma_is_dax(vma)) + return false; + inode = file_inode(vma->vm_file); + if (inode->i_mode == S_IFCHR) + return false; /* device-dax */ + return true; +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index b3b6a7e313e9..ea818ff739cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +#ifdef CONFIG_FS_DAX +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas); +#else +static inline long get_user_pages_longterm(unsigned long start, + unsigned long nr_pages, unsigned int gup_flags, + struct page **pages, struct vm_area_struct **vmas) +{ + return get_user_pages(start, nr_pages, gup_flags, pages, vmas); +} +#endif /* CONFIG_FS_DAX */ + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index 85cc822fd403..d3fb60e5bfac 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas_arg) +{ + struct vm_area_struct **vmas = vmas_arg; + struct vm_area_struct *vma_prev = NULL; + long rc, i; + + if (!pages) + return -EINVAL; + + if (!vmas) { + vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + return -ENOMEM; + } + + rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + + for (i = 0; i < rc; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma == vma_prev) + continue; + + vma_prev = vma; + + if (vma_is_fsdax(vma)) + break; + } + + /* + * Either get_user_pages() failed, or the vma validation + * succeeded, in either case we don't need to put_page() before + * returning. + */ + if (i >= rc) + goto out; + + for (i = 0; i < rc; i++) + put_page(pages[i]); + rc = -EOPNOTSUPP; +out: + if (vmas != vmas_arg) + kfree(vmas); + return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma -- cgit v1.2.3 From b7f0554a56f21fb3e636a627450a9add030889be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:39 -0800 Subject: mm: fail get_vaddr_frames() for filesystem-dax mappings Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow V4L2, Exynos, and other frame vector users to create long standing / irrevocable memory registrations against filesytem-dax vmas. [dan.j.williams@intel.com: add comment for vma_is_fsdax() check in get_vaddr_frames(), per Jan] Link: http://lkml.kernel.org/r/151197874035.26211.4061781453123083667.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151068939985.7446.15684639617389154187.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reviewed-by: Jan Kara Cc: Inki Dae Cc: Seung-Woo Kim Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Vlastimil Babka Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Ross Zwisler Cc: Sean Hefty Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/frame_vector.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 2f98df0d460e..297c7238f7d4 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = -EFAULT; goto out; } + + /* + * While get_vaddr_frames() could be used for transient (kernel + * controlled lifetime) pinning of memory pages all current + * users establish long term (userspace controlled lifetime) + * page pinning. Treat get_vaddr_frames() like + * get_user_pages_longterm() and disallow it for filesystem-dax + * mappings. + */ + if (vma_is_fsdax(vma)) + return -EOPNOTSUPP; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { vec->got_ref = true; vec->is_pfns = false; -- cgit v1.2.3 From b70131de648c2b997d22f4653934438013f407a1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:43 -0800 Subject: v4l2: disable filesystem-dax mapping support V4L2 memory registrations are incompatible with filesystem-dax that needs the ability to revoke dma access to a mapping at will, or otherwise allow the kernel to wait for completion of DMA. The filesystem-dax implementation breaks the traditional solution of truncate of active file backed mappings since there is no page-cache page we can orphan to sustain ongoing DMA. If v4l2 wants to support long lived DMA mappings it needs to arrange to hold a file lease or use some other mechanism so that the kernel can coordinate revoking DMA access when the filesystem needs to truncate mappings. Link: http://lkml.kernel.org/r/151068940499.7446.12846708245365671207.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Jan Kara Reviewed-by: Jan Kara Cc: Mauro Carvalho Chehab Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/v4l2-core/videobuf-dma-sg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 0b5c43f7e020..f412429cf5ba 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); - err = get_user_pages(data & PAGE_MASK, dma->nr_pages, + err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages, flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; - dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages); + dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err, + dma->nr_pages); return err < 0 ? err : -EINVAL; } return 0; -- cgit v1.2.3 From 5f1d43de54164dcfb9bfa542fcc92c1e1a1b6c1d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:47 -0800 Subject: IB/core: disable memory registration of filesystem-dax vmas Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow RDMA to create long standing memory registrations against filesytem-dax vmas. Link: http://lkml.kernel.org/r/151068941011.7446.7766030590347262502.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Jason Gunthorpe Acked-by: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Jeff Moyer Cc: Ross Zwisler Cc: Inki Dae Cc: Jan Kara Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 21e60b1e2ff4..130606c3b07c 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_list_start = umem->sg_head.sgl; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); -- cgit v1.2.3 From 04e35f4495dd560db30c25efca4eecae8ec8c375 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 29 Nov 2017 16:10:51 -0800 Subject: exec: avoid RLIMIT_STACK races with prlimit() While the defense-in-depth RLIMIT_STACK limit on setuid processes was protected against races from other threads calling setrlimit(), I missed protecting it against races from external processes calling prlimit(). This adds locking around the change and makes sure that rlim_max is set too. Link: http://lkml.kernel.org/r/20171127193457.GA11348@beast Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec") Signed-off-by: Kees Cook Reported-by: Ben Hutchings Reported-by: Brad Spengler Acked-by: Serge Hallyn Cc: James Morris Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Jiri Slaby Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 1d6243d9f2b6..6be2aa0ab26f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm) * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid - * needing to clean up the change on failure. + * races from other threads changing the limits. This also + * must be protected from races with prlimit() calls. */ + task_lock(current->group_leader); if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; + if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM) + current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM; + task_unlock(current->group_leader); } arch_pick_mmap_layout(current->mm); -- cgit v1.2.3 From 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 Mon Sep 17 00:00:00 2001 From: chenjie Date: Wed, 29 Nov 2017 16:10:54 -0800 Subject: mm/madvise.c: fix madvise() infinite loop under special circumstances MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings. Unfortunately madvise_willneed() doesn't communicate this information properly to the generic madvise syscall implementation. The calling convention is quite subtle there. madvise_vma() is supposed to either return an error or update &prev otherwise the main loop will never advance to the next vma and it will keep looping for ever without a way to get out of the kernel. It seems this has been broken since introduction. Nobody has noticed because nobody seems to be using MADVISE_WILLNEED on these DAX mappings. [mhocko@suse.com: rewrite changelog] Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place") Signed-off-by: chenjie Signed-off-by: guoxuenan Acked-by: Michal Hocko Cc: Minchan Kim Cc: zhangyi (F) Cc: Miao Xie Cc: Mike Rapoport Cc: Shaohua Li Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: David Rientjes Cc: Anshuman Khandual Cc: Rik van Riel Cc: Carsten Otte Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 375cf32087e4..751e97aa2210 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; -- cgit v1.2.3 From 90daf3062fc0f8f919d5496fe167bbd6016a6a63 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:10:58 -0800 Subject: Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical" This reverts commit 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") because it causes false positive warnings during OOM situations as noticed by Tetsuo Handa: Node 0 active_anon:3525940kB inactive_anon:8372kB active_file:216kB inactive_file:1872kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:2504kB dirty:52kB writeback:0kB shmem:8660kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 636928kB writeback_tmp:0kB unstable:0kB all_unreclaimable? yes Node 0 DMA free:14848kB min:284kB low:352kB high:420kB active_anon:992kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15988kB managed:15904kB mlocked:0kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 2687 3645 3645 Node 0 DMA32 free:53004kB min:49608kB low:62008kB high:74408kB active_anon:2712648kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:3129216kB managed:2773132kB mlocked:0kB kernel_stack:96kB pagetables:5096kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 0 958 958 Node 0 Normal free:17140kB min:17684kB low:22104kB high:26524kB active_anon:812300kB inactive_anon:8372kB active_file:1228kB inactive_file:1868kB unevictable:0kB writepending:52kB present:1048576kB managed:981224kB mlocked:0kB kernel_stack:3520kB pagetables:8552kB bounce:0kB free_pcp:120kB local_pcp:120kB free_cma:0kB lowmem_reserve[]: 0 0 0 0 [...] Out of memory: Kill process 8459 (a.out) score 999 or sacrifice child Killed process 8459 (a.out) total-vm:4180kB, anon-rss:88kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 8459 (a.out), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB vm direct limit must be set greater than background limit. The problem is that both thresh and bg_thresh will be 0 if available_memory is less than 4 pages when evaluating global_dirtyable_memory. While this might be worked around the whole point of the warning is dubious at best. We do rely on admins to do sensible things when changing tunable knobs. Dirty memory writeback knobs are not any special in that regards so revert the warning rather than adding more hacks to work this around. Debugged by Yafang Shao. Link: http://lkml.kernel.org/r/20171127091939.tahb77nznytcxw55@dhcp22.suse.cz Fixes: 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Cc: Yafang Shao Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 7 ------- mm/page-writeback.c | 5 +---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b920423f88cb..5025ff9307e6 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any value lower than this limit will be ignored and the old configuration will be retained. -Note: the value of dirty_bytes also must be set greater than -dirty_background_bytes or the amount of memory corresponding to -dirty_background_ratio. - ============================================================== dirty_expire_centisecs @@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data. The total available memory is not equal to total system memory. -Note: dirty_ratio must be set greater than dirty_background_ratio or -ratio corresponding to dirty_background_bytes. - ============================================================== dirty_writeback_centisecs diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7095030aa1f..586f31261c83 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (unlikely(bg_thresh >= thresh)) { - pr_warn("vm direct limit must be set greater than background limit.\n"); + if (bg_thresh >= thresh) bg_thresh = thresh / 2; - } - tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; -- cgit v1.2.3 From d5dabd633922ac5ee5bcc67748f7defb8b211469 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 29 Nov 2017 16:11:01 -0800 Subject: fs/mbcache.c: make count_objects() more robust When running ltp stress test for 7*24 hours, vmscan occasionally emits the following warning continuously: mb_cache_scan+0x0/0x3f0 negative objects to delete nr=-9232265467809300450 ... Tracing shows the freeable(mb_cache_count returns) is -1, which causes the continuous accumulation and overflow of total_scan. This patch makes sure that mb_cache_count() cannot return a negative value, which makes the mbcache shrinker more robust. Link: http://lkml.kernel.org/r/1511753419-52328-1-git-send-email-jiang.biao2@zte.com.cn Signed-off-by: Jiang Biao Cc: Al Viro Cc: Minchan Kim Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/mbcache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index d818fd236787..b8b8b9ced9f8 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink, struct mb_cache *cache = container_of(shrink, struct mb_cache, c_shrink); + /* Unlikely, but not impossible */ + if (unlikely(cache->c_entry_count < 0)) + return 0; return cache->c_entry_count; } -- cgit v1.2.3 From edbddb83a15b4361d8c3bf00aabee85fd3ef4d80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Nov 2017 16:11:05 -0800 Subject: scripts/bloat-o-meter: don't fail with division by 0 Under some circumstances it's possible to get a divider 0 which crashes the script. Traceback (most recent call last): File "linux/scripts/bloat-o-meter", line 98, in print_result("Function", "tTdDbBrR", 2) File "linux/scripts/bloat-o-meter", line 87, in print_result (otot, ntot, (ntot - otot)*100.0/otot)) ZeroDivisionError: float division by zero Hide this by checking the divider first. Link: http://lkml.kernel.org/r/20171123171219.31453-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexey Dobriyan Cc: Vaneet Narang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/bloat-o-meter | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 6f099f915dcf..94b664817ad9 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc): for d, n in delta: if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) - print("Total: Before=%d, After=%d, chg %+.2f%%" % \ - (otot, ntot, (ntot - otot)*100.0/otot)) + if otot: + percent = (ntot - otot) * 100.0 / otot + else: + percent = 0 + print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent)) if sys.argv[1] == "-c": print_result("Function", "tT", 3) -- cgit v1.2.3 From bde5f6bc68db51128f875a756e9082a6c6ff7b4c Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: kmemleak: add scheduling point to kmemleak_scan() kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e4738d5e9b8c..3d4781756d50 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1523,6 +1523,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); -- cgit v1.2.3 From 40a899ed16486455f964e46d1af31fd4fded21c1 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 29 Nov 2017 16:11:12 -0800 Subject: mm: migrate: fix an incorrect call of prep_transhuge_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during memory hotplug/hot remove prep_transhuge_page() is called incorrectly on non-THP pages for migration, when THP is on but THP migration is not enabled. This leads to a bad state of target pages for migration. By inspecting the code, if called on a non-THP, prep_transhuge_page() will 1) change the value of the mapping of (page + 2), since it is used for THP deferred list; 2) change the lru value of (page + 1), since it is used for THP's dtor. Both can lead to data corruption of these two pages. Andrea said: "Pragmatically and from the point of view of the memory_hotplug subsys, the effect is a kernel crash when pages are being migrated during a memory hot remove offline and migration target pages are found in a bad state" This patch fixes it by only calling prep_transhuge_page() when we are certain that the target page is THP. Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration") Signed-off-by: Zi Yan Reported-by: Andrea Reale Cc: Naoya Horiguchi Cc: Michal Hocko Cc: "Jérôme Glisse" Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 895ec0c4942e..a2246cf670ba 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page, new_page = __alloc_pages_nodemask(gfp_mask, order, preferred_nid, nodemask); - if (new_page && PageTransHuge(page)) + if (new_page && PageTransHuge(new_page)) prep_transhuge_page(new_page); return new_page; -- cgit v1.2.3 From d08afa149acfd00871484ada6dabc3880524cd1c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 29 Nov 2017 16:11:15 -0800 Subject: mm, memcg: fix mem_cgroup_swapout() for THPs Commit d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") changed mem_cgroup_swapout() to support transparent huge page (THP). However the patch missed one location which should be changed for correctly handling THPs. The resulting bug will cause the memory cgroups whose THPs were swapped out to become zombies on deletion. Link: http://lkml.kernel.org/r/20171128161941.20931-1-shakeelb@google.com Fixes: d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Huang Ying Cc: Vladimir Davydov Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50e6906314f8..ac2ffd5e02b9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) memcg_check_events(memcg, page); if (!mem_cgroup_is_root(memcg)) - css_put(&memcg->css); + css_put_many(&memcg->css, nr_entries); } /** -- cgit v1.2.3 From b6e8e12c0aeb5fbf1bf46c84d58cc93aedede385 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Wed, 29 Nov 2017 16:11:19 -0800 Subject: fs/fat/inode.c: fix sb_rdonly() change Commit bc98a42c1f7d ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") converted fat_remount():new_rdonly from a bool to an int. However fat_remount() depends upon the compiler's conversion of a non-zero integer into boolean `true'. Fix it by switching `new_rdonly' back into a bool. Link: http://lkml.kernel.org/r/87mv3d5x51.fsf@mail.parknet.co.jp Fixes: bc98a42c1f7d0f8 ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") Signed-off-by: OGAWA Hirofumi Cc: Joe Perches Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 016c46b5e44c..20a0a89eaca5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { - int new_rdonly; + bool new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); -- cgit v1.2.3 From 43694d4bf843ddd34519e8e9de983deefeada699 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:23 -0800 Subject: autofs: revert "autofs: take more care to not update last_used on path walk" While commit 092a53452bb7 ("autofs: take more care to not update last_used on path walk") helped (partially) resolve a problem where automounts were not expiring due to aggressive accesses from user space it has a side effect for very large environments. This change helps with the expire problem by making the expire more aggressive but, for very large environments, that means more mount requests from clients. When there are a lot of clients that can mean fairly significant server load increases. It turns out I put the last_used in this position to solve this very problem and failed to update my own thinking of the autofs expire policy. So the patch being reverted introduces a regression which should be fixed. Link: http://lkml.kernel.org/r/151174729420.6162.1832622523537052460.stgit@pluto.themaw.net Fixes: 092a53452b ("autofs: take more care to not update last_used on path walk") Signed-off-by: Ian Kent Reviewed-by: NeilBrown Cc: Al Viro Cc: [4.11+] Cc: Colin Walters Cc: David Howells Cc: Ondrej Holy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d79ced925861..82e8f6edfb48 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs4_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); - ino->last_used = jiffies; } + ino->last_used = jiffies; return status; } @@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; + struct autofs_info *ino; struct dentry *new; new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; - if (new == dentry) - dput(new); - else { - struct autofs_info *ino; - - ino = autofs4_dentry_ino(new); - ino->last_used = jiffies; - dput(path->dentry); - path->dentry = new; - } + ino = autofs4_dentry_ino(new); + ino->last_used = jiffies; + dput(path->dentry); + path->dentry = new; } return path->dentry; } -- cgit v1.2.3 From 5d38f049cee1e1c4a7ac55aa79d37d01ddcc3860 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:26 -0800 Subject: autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored" Commit 42f461482178 ("autofs: fix AT_NO_AUTOMOUNT not being honored") allowed the fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT flag but introduced a semantic change. In order to honor AT_NO_AUTOMOUNT a semantic change was made to the negative dentry case for stat family system calls in follow_automount(). This changed the unconditional triggering of an automount in this case to no longer be done and an error returned instead. This has caused more problems than I expected so reverting the change is needed. In a discussion with Neil Brown it was concluded that the automount(8) daemon can implement this change without kernel modifications. So that will be done instead and the autofs module documentation updated with a description of the problem and what needs to be done by module users for this specific case. Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.themaw.net Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored") Signed-off-by: Ian Kent Cc: Neil Brown Cc: Al Viro Cc: David Howells Cc: Colin Walters Cc: Ondrej Holy Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 15 +++------------ include/linux/fs.h | 3 ++- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f0c7a7b9b6ca..9cc91fb7f156 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd, * of the daemon to instantiate them before they can be used. */ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | - LOOKUP_AUTOMOUNT))) { - /* Positive dentry that isn't meant to trigger an - * automount, EISDIR will allow it to be used, - * otherwise there's no mount here "now" so return - * ENOENT. - */ - if (path->dentry->d_inode) - return -EISDIR; - else - return -ENOENT; - } + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && + path->dentry->d_inode) + return -EISDIR; if (path->dentry->d_sb->s_user_ns != &init_user_ns) return -EACCES; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dc498d16cc1..511fbaabf624 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { -- cgit v1.2.3 From f4f0a3d85b50a65a348e2b8635041d6b30f01deb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 29 Nov 2017 16:11:30 -0800 Subject: mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine I made a mistake during converting hugetlb code to 5-level paging: in huge_pte_alloc() we have to use p4d_alloc(), not p4d_offset(). Otherwise it leads to crash -- NULL-pointer dereference in pud_alloc() if p4d table is not yet allocated. It only can happen in 5-level paging mode. In 4-level paging mode p4d_offset() always returns pgd, so we are fine. Link: http://lkml.kernel.org/r/20171122121921.64822-1-kirill.shutemov@linux.intel.com Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging") Signed-off-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 698e8fb34031..9a334f5fb730 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4635,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - p4d = p4d_offset(pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { -- cgit v1.2.3 From 72639e6df4128dfde8fee7638a35be7db8114000 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 29 Nov 2017 16:11:33 -0800 Subject: fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate() hugetlfs_fallocate() currently performs put_page() before unlock_page(). This scenario opens a small time window, from the time the page is added to the page cache, until it is unlocked, in which the page might be removed from the page-cache by another core. If the page is removed during this time windows, it might cause a memory corruption, as the wrong page will be unlocked. It is arguable whether this scenario can happen in a real system, and there are several mitigating factors. The issue was found by code inspection (actually grep), and not by actually triggering the flow. Yet, since putting the page before unlocking is incorrect it should be fixed, if only to prevent future breakage or someone copy-pasting this code. Mike said: "I am of the opinion that this does not need to be sent to stable. Although the ordering is current code is incorrect, there is no way for this to be a problem with current locking. In addition, I verified that the perhaps bigger issue with sys_fadvise64(POSIX_FADV_DONTNEED) for hugetlbfs and other filesystems is addressed in 3a77d214807c ("mm: fadvise: avoid fadvise for fs without backing device")" Link: http://lkml.kernel.org/r/20170826191124.51642-1-namit@vmware.com Fixes: 70c3547e36f5c ("hugetlbfs: add hugetlbfs_fallocate()") Signed-off-by: Nadav Amit Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Eric Biggers Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e76730aac0d..8a85f3f53446 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_unlock(&hugetlb_fault_mutex_table[hash]); /* - * page_put due to reference from alloc_huge_page() * unlock_page because locked by add_to_page_cache() + * page_put due to reference from alloc_huge_page() */ - put_page(page); unlock_page(page); + put_page(page); } if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) -- cgit v1.2.3 From 1b6fba458c0a2e8513071330972c4c587b7d28cc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 21 Nov 2017 09:17:43 +0100 Subject: drm/bridge: adv7511/33: Fix adv7511_cec_init() failure handling If the device tree for a board did not specify a cec clock, then adv7511_cec_init would return an error, which would cause adv7511_probe() to fail and thus there is no HDMI output. There is no need to have adv7511_probe() fail if the CEC initialization fails, so just change adv7511_cec_init() to a void function. In addition, adv7511_cec_init() should just return silently if the cec clock isn't found and show a message for any other errors. An otherwise correct cleanup patch from Dan Carpenter turned this broken failure handling into a kernel Oops, so bisection points to commit 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") rather than 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support"). Based on earlier patches from Arnd and John. Reported-by: Naresh Kamboju Cc: Xinliang Liu Cc: Dan Carpenter Cc: Sean Paul Cc: Archit Taneja Cc: John Stultz Link: https://bugs.linaro.org/show_bug.cgi?id=3345 Link: https://lkft.validation.linaro.org/scheduler/job/48017#L3551 Fixes: 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") Fixes: 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support") Signed-off-by: Hans Verkuil Tested-by: Hans Verkuil Reviewed-by: Laurent Pinchart Tested-by: John Stultz Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/9097b2a4-b6b9-5fca-e039-0a17694b1143@xs4all.nl --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 13 +++++++++-- drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 32 +++++++++++++++++++--------- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 17 ++++----------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index b4efcbabf7f7..d034b2cb5eee 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -372,9 +372,18 @@ struct adv7511 { }; #ifdef CONFIG_DRM_I2C_ADV7511_CEC -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset); +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511); void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); +#else +static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) +{ + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; + + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return 0; +} #endif #ifdef CONFIG_DRM_I2C_ADV7533 diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index b33d730e4d73..a20a45c0b353 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -300,18 +300,21 @@ static int adv7511_cec_parse_dt(struct device *dev, struct adv7511 *adv7511) return 0; } -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset) +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) { + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; int ret = adv7511_cec_parse_dt(dev, adv7511); if (ret) - return ret; + goto err_cec_parse_dt; adv7511->cec_adap = cec_allocate_adapter(&adv7511_cec_adap_ops, adv7511, dev_name(dev), CEC_CAP_DEFAULTS, ADV7511_MAX_ADDRS); - if (IS_ERR(adv7511->cec_adap)) - return PTR_ERR(adv7511->cec_adap); + if (IS_ERR(adv7511->cec_adap)) { + ret = PTR_ERR(adv7511->cec_adap); + goto err_cec_alloc; + } regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, 0); /* cec soft reset */ @@ -329,9 +332,18 @@ int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, ((adv7511->cec_clk_freq / 750000) - 1) << 2); ret = cec_register_adapter(adv7511->cec_adap, dev); - if (ret) { - cec_delete_adapter(adv7511->cec_adap); - adv7511->cec_adap = NULL; - } - return ret; + if (ret) + goto err_cec_register; + return 0; + +err_cec_register: + cec_delete_adapter(adv7511->cec_adap); + adv7511->cec_adap = NULL; +err_cec_alloc: + dev_info(dev, "Initializing CEC failed with error %d, disabling CEC\n", + ret); +err_cec_parse_dt: + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return ret == -EPROBE_DEFER ? ret : 0; } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 0e14f1572d05..efa29db5fc2b 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1084,7 +1084,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) struct device *dev = &i2c->dev; unsigned int main_i2c_addr = i2c->addr << 1; unsigned int edid_i2c_addr = main_i2c_addr + 4; - unsigned int offset; unsigned int val; int ret; @@ -1192,24 +1191,16 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (adv7511->type == ADV7511) adv7511_set_link_config(adv7511, &link_config); + ret = adv7511_cec_init(dev, adv7511); + if (ret) + goto err_unregister_cec; + adv7511->bridge.funcs = &adv7511_bridge_funcs; adv7511->bridge.of_node = dev->of_node; drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); - - offset = adv7511->type == ADV7533 ? ADV7533_REG_CEC_OFFSET : 0; - -#ifdef CONFIG_DRM_I2C_ADV7511_CEC - ret = adv7511_cec_init(dev, adv7511, offset); - if (ret) - goto err_unregister_cec; -#else - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, - ADV7511_CEC_CTRL_POWER_DOWN); -#endif - return 0; err_unregister_cec: -- cgit v1.2.3 From ebe32c3e282a62974b190b9d514864fc0d56716e Mon Sep 17 00:00:00 2001 From: Pierre-Hugues Husson Date: Sat, 25 Nov 2017 21:18:44 +0100 Subject: drm/bridge: synopsys/dw-hdmi: Enable cec clock Support the "cec" optional clock. The documentation already mentions "cec" optional clock and it is used by several boards, but currently the driver doesn't enable it, thus preventing cec from working on those boards. And even worse: a /dev/cecX device will appear for those boards, but it won't be functioning without configuring this clock. Changes: v4: - Change commit message to stress the importance of this patch v3: - Drop useless braces v2: - Separate ENOENT errors from others - Propagate other errors (especially -EPROBE_DEFER) Signed-off-by: Pierre-Hugues Husson Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171125201844.11353-1-phh@phh.me --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index bf14214fa464..b72259bf6e2f 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -138,6 +138,7 @@ struct dw_hdmi { struct device *dev; struct clk *isfr_clk; struct clk *iahb_clk; + struct clk *cec_clk; struct dw_hdmi_i2c *i2c; struct hdmi_data_info hdmi_data; @@ -2382,6 +2383,26 @@ __dw_hdmi_probe(struct platform_device *pdev, goto err_isfr; } + hdmi->cec_clk = devm_clk_get(hdmi->dev, "cec"); + if (PTR_ERR(hdmi->cec_clk) == -ENOENT) { + hdmi->cec_clk = NULL; + } else if (IS_ERR(hdmi->cec_clk)) { + ret = PTR_ERR(hdmi->cec_clk); + if (ret != -EPROBE_DEFER) + dev_err(hdmi->dev, "Cannot get HDMI cec clock: %d\n", + ret); + + hdmi->cec_clk = NULL; + goto err_iahb; + } else { + ret = clk_prepare_enable(hdmi->cec_clk); + if (ret) { + dev_err(hdmi->dev, "Cannot enable HDMI cec clock: %d\n", + ret); + goto err_iahb; + } + } + /* Product and revision IDs */ hdmi->version = (hdmi_readb(hdmi, HDMI_DESIGN_ID) << 8) | (hdmi_readb(hdmi, HDMI_REVISION_ID) << 0); @@ -2518,6 +2539,8 @@ err_iahb: cec_notifier_put(hdmi->cec_notifier); clk_disable_unprepare(hdmi->iahb_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); err_isfr: clk_disable_unprepare(hdmi->isfr_clk); err_res: @@ -2541,6 +2564,8 @@ static void __dw_hdmi_remove(struct dw_hdmi *hdmi) clk_disable_unprepare(hdmi->iahb_clk); clk_disable_unprepare(hdmi->isfr_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); if (hdmi->i2c) i2c_del_adapter(&hdmi->i2c->adap); -- cgit v1.2.3 From dbb58bfd9ae6c885b2ca001a9a5ab8b881fb4ba9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Nov 2017 11:16:47 -0800 Subject: drm/bridge: Fix lvds-encoder since the panel_bridge rework. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The panel_bridge bridge attaches to the panel's OF node, not the lvds-encoder's node. Put in a little no-op bridge of our own so that our consumers can still find a bridge where they expect. This also fixes an unintended unregistration and leak of the panel-bridge on module remove. Signed-off-by: Eric Anholt Fixes: 13dfc0540a57 ("drm/bridge: Refactor out the panel wrapper from the lvds-encoder bri dge.") Tested-by: Lothar Waßmann Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171114191647.22207-1-eric@anholt.net --- drivers/gpu/drm/bridge/lvds-encoder.c | 48 ++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c index 0903ba574f61..75b0d3f6e4de 100644 --- a/drivers/gpu/drm/bridge/lvds-encoder.c +++ b/drivers/gpu/drm/bridge/lvds-encoder.c @@ -13,13 +13,37 @@ #include +struct lvds_encoder { + struct drm_bridge bridge; + struct drm_bridge *panel_bridge; +}; + +static int lvds_encoder_attach(struct drm_bridge *bridge) +{ + struct lvds_encoder *lvds_encoder = container_of(bridge, + struct lvds_encoder, + bridge); + + return drm_bridge_attach(bridge->encoder, lvds_encoder->panel_bridge, + bridge); +} + +static struct drm_bridge_funcs funcs = { + .attach = lvds_encoder_attach, +}; + static int lvds_encoder_probe(struct platform_device *pdev) { struct device_node *port; struct device_node *endpoint; struct device_node *panel_node; struct drm_panel *panel; - struct drm_bridge *bridge; + struct lvds_encoder *lvds_encoder; + + lvds_encoder = devm_kzalloc(&pdev->dev, sizeof(*lvds_encoder), + GFP_KERNEL); + if (!lvds_encoder) + return -ENOMEM; /* Locate the panel DT node. */ port = of_graph_get_port_by_id(pdev->dev.of_node, 1); @@ -49,20 +73,30 @@ static int lvds_encoder_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_LVDS); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); + lvds_encoder->panel_bridge = + devm_drm_panel_bridge_add(&pdev->dev, + panel, DRM_MODE_CONNECTOR_LVDS); + if (IS_ERR(lvds_encoder->panel_bridge)) + return PTR_ERR(lvds_encoder->panel_bridge); + + /* The panel_bridge bridge is attached to the panel's of_node, + * but we need a bridge attached to our of_node for our user + * to look up. + */ + lvds_encoder->bridge.of_node = pdev->dev.of_node; + lvds_encoder->bridge.funcs = &funcs; + drm_bridge_add(&lvds_encoder->bridge); - platform_set_drvdata(pdev, bridge); + platform_set_drvdata(pdev, lvds_encoder); return 0; } static int lvds_encoder_remove(struct platform_device *pdev) { - struct drm_bridge *bridge = platform_get_drvdata(pdev); + struct lvds_encoder *lvds_encoder = platform_get_drvdata(pdev); - drm_bridge_remove(bridge); + drm_bridge_remove(&lvds_encoder->bridge); return 0; } -- cgit v1.2.3 From cffd2b16c01c3431a7a7dd62e722af33490fc436 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: drm/bridge: tc358767: do no fail on hi-res displays Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8571cfd877c5..e1996dbfb0a1 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) -- cgit v1.2.3 From 99fc8e963a4c0203dba26a77cf737db6081bca14 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: drm/bridge: tc358767: filter out too high modes Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e1996dbfb0a1..e571b5e29ae7 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1103,7 +1103,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } -- cgit v1.2.3 From f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: drm/bridge: tc358767: fix DP0_MISC register set Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e571b5e29ae7..7334cb2121a3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: -- cgit v1.2.3 From 66d1c3b94d5d59e4325e61a78d520f92c043d645 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: drm/bridge: tc358767: fix timing calculations Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 7334cb2121a3..b916346b933a 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); -- cgit v1.2.3 From 9217c1abbc145a77d65c476cf2004a3df02104c7 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: drm/bridge: tc358767: fix AUXDATAn registers access First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index b916346b933a..24f495bf0567 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } -- cgit v1.2.3 From 4dbd6c03fbf88299c573d676838896c6e06aade2 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: drm/bridge: tc358767: fix 1-lane behavior Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 24f495bf0567..8636e7eeb731 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } -- cgit v1.2.3 From fd50fbb6bfdea5daa4ae4dd7b7082485ac44bdf5 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 27 Nov 2017 11:12:33 +0200 Subject: drm/i915: Disable THP until we have a GPU read BW W/A We seem to be missing some W/A for 2M pages and are getting a hit on raw GPU read bandwidths (even 30%) even though the GPU write bandwidths improve (even 10%). For now, disable THP, which is our only practical source of 2M pages until we have a W/A for the issue. v2: - Be explicit that we talk about GPU bandwidths (Eero) - s/deny/never/ because that's why (Chris) Reported-by: Valtteri Rantala Fixes: b901bb89324a ("drm/i915/gemfs: enable THP") Signed-off-by: Joonas Lahtinen Cc: Matthew Auld Cc: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Valtteri Rantala Cc: Eero Tamminen Reviewed-by: Chris Wilson Reviewed-by: Matthew Auld Tested-by: Valtteri Rantala Link: https://patchwork.freedesktop.org/patch/msgid/20171127091233.7001-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 9987da4b5dcfc8b94b702d4bb94b30955eb73c75) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gemfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c index e2993857df37..888b7d3f04c3 100644 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -52,7 +52,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (has_transparent_hugepage()) { struct super_block *sb = gemfs->mnt_sb; - char options[] = "huge=within_size"; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; int flags = 0; int err; -- cgit v1.2.3 From 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: drm/omap: Fix error handling path in 'omap_dmm_probe()' If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 1dd3dafc59af..c60a85e82c6d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; -- cgit v1.2.3 From 499ec0ed5eb2f6a7fcaab2dd66ffc5993484bda9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:06:41 +0200 Subject: drm/omap: displays: panel-dpi: add backlight dependency The new backlight code causes a link failure when backlight support itself is disabled: drivers/gpu/drm/omapdrm/displays/panel-dpi.o: In function `panel_dpi_probe_of': panel-dpi.c:(.text+0x35c): undefined reference to `of_find_backlight_by_node' This adds a Kconfig dependency like we have for the other OMAP display targets. Fixes: 39135a305a0f ("drm/omap: displays: panel-dpi: Support for handling backlight devices") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/displays/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/omapdrm/displays/Kconfig b/drivers/gpu/drm/omapdrm/displays/Kconfig index c226da145fb3..a349cb61961e 100644 --- a/drivers/gpu/drm/omapdrm/displays/Kconfig +++ b/drivers/gpu/drm/omapdrm/displays/Kconfig @@ -35,6 +35,7 @@ config DRM_OMAP_CONNECTOR_ANALOG_TV config DRM_OMAP_PANEL_DPI tristate "Generic DPI panel" + depends on BACKLIGHT_CLASS_DEVICE help Driver for generic DPI panels. -- cgit v1.2.3 From 23970e150a0a49f9a966c46e5d22fed06226098f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 20 Nov 2017 11:51:40 +0200 Subject: omapdrm: hdmi4: Correct the SoC revision matching I believe the intention of the commit 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") was to identify omap4430 ES1.x, omap4430 ES2.x and other OMAP4 revisions, like omap4460. By using family=OMAP4 in the match the code will treat omap4460 ES1.x in a same way as it would treat omap4430 ES1.x This breaks HDMI audio on OMAP4460 devices (PandaES for example). Correct the match rule so we are not going to get false positive match. Fixes: 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 62e451162d96..b06f9956e733 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -886,25 +886,36 @@ struct hdmi4_features { bool audio_use_mclk; }; -static const struct hdmi4_features hdmi4_es1_features = { +static const struct hdmi4_features hdmi4430_es1_features = { .cts_swmode = false, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es2_features = { +static const struct hdmi4_features hdmi4430_es2_features = { .cts_swmode = true, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es3_features = { +static const struct hdmi4_features hdmi4_features = { .cts_swmode = true, .audio_use_mclk = true, }; static const struct soc_device_attribute hdmi4_soc_devices[] = { - { .family = "OMAP4", .revision = "ES1.?", .data = &hdmi4_es1_features }, - { .family = "OMAP4", .revision = "ES2.?", .data = &hdmi4_es2_features }, - { .family = "OMAP4", .data = &hdmi4_es3_features }, + { + .machine = "OMAP4430", + .revision = "ES1.?", + .data = &hdmi4430_es1_features, + }, + { + .machine = "OMAP4430", + .revision = "ES2.?", + .data = &hdmi4430_es2_features, + }, + { + .family = "OMAP4", + .data = &hdmi4_features, + }, { /* sentinel */ } }; -- cgit v1.2.3 From bf25dac38f71d392a31ec074f55cbc941f1eaf1d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 16 Nov 2017 09:50:19 +0100 Subject: drm: omapdrm: Fix DPI on platforms using the DSI VDDS Commit d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") replaced usage of platform data version with SoC matching to configure DPI VDDS. The SoC match entries were incorrect, they should have matched on the machine name instead of the SoC family. Fix it. The result was observed on OpenPandora with OMAP3530 where the panel only had the Blue channel and Red&Green were missing. It was not observed on GTA04 with DM3730. Fixes: d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") Signed-off-by: Laurent Pinchart Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/dpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c index daf286fc8a40..ca1e3b489540 100644 --- a/drivers/gpu/drm/omapdrm/dss/dpi.c +++ b/drivers/gpu/drm/omapdrm/dss/dpi.c @@ -566,8 +566,8 @@ static int dpi_verify_pll(struct dss_pll *pll) } static const struct soc_device_attribute dpi_soc_devices[] = { - { .family = "OMAP3[456]*" }, - { .family = "[AD]M37*" }, + { .machine = "OMAP3[456]*" }, + { .machine = "[AD]M37*" }, { /* sentinel */ } }; -- cgit v1.2.3 From bc2aba905211a8fc6da367e04595d1624f0fa156 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2017 09:27:05 +0300 Subject: omapdrm: hdmi4_cec: signedness bug in hdmi4_cec_init() "ret" needs to be signed for the error handling to work. Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support") Signed-off-by: Dan Carpenter Reviewed-by: Sebastian Reichel Acked-by: Hans Verkuil Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index d86873f2abe6..e626eddf24d5 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -352,7 +352,7 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core, { const u32 caps = CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS | CEC_CAP_PASSTHROUGH | CEC_CAP_RC; - unsigned int ret; + int ret; core->adap = cec_allocate_adapter(&hdmi_cec_adap_ops, core, "omap4", caps, CEC_MAX_LOG_ADDRS); -- cgit v1.2.3 From a154f8e399a063137fc42b961f437248d55ece29 Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Thu, 30 Nov 2017 10:49:46 +0100 Subject: net: mvpp2: allocate zeroed tx descriptors Reserved and unused fields in the Tx descriptors should be 0. The PPv2 driver doesn't clear them at run-time (for performance reasons) but these descriptors aren't zeroed when allocated, which can lead to unpredictable behaviors. This patch fixes this by using dma_zalloc_coherent instead of dma_alloc_coherent. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Yan Markman [Antoine: commit message] Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index d83a78be98a2..fed2b2f909fc 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5598,7 +5598,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, u32 txq_dma; /* Allocate memory for TX descriptors */ - aggr_txq->descs = dma_alloc_coherent(&pdev->dev, + aggr_txq->descs = dma_zalloc_coherent(&pdev->dev, MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, &aggr_txq->descs_dma, GFP_KERNEL); if (!aggr_txq->descs) -- cgit v1.2.3 From f8821f96ae97260d68228fe53f81848b2ede44d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:33:56 +0100 Subject: skbuff: Grammar s/are can/can/, s/change/changes/ Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bc486ef23f20..a38c80e9f91e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb) } /* - * If users == 1, we are the only owner and are can avoid redundant - * atomic change. + * If users == 1, we are the only owner and can avoid redundant atomic changes. */ /** -- cgit v1.2.3 From a158531f3c92467df0e93e000d58185acae78a6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Nov 2017 15:18:10 +0100 Subject: gpio: 74x164: Fix crash during .remove() Commit 7ebc194d0fd4bb0f ("gpio: 74x164: Introduce 'enable-gpios' property") added a new member gpiod_oe to the end of the struct gen_74x164_chip, after the zero-length buffer array. However, this buffer is a flexible array, allocated together with the structure during .probe(). As the buffer is no longer the last member, writing to it corrupts the newly added member after it. During device removal, the corrupted member will be used as a pointer, leading to a crash. This went unnoticed, as the flexible array was declared as "buffer[0]" instead of "buffer[]", and thus did not trigger a "flexible array member not at end of struct" error from gcc. Move the gpiod_oe field up to fix this, and drop the zero from the array size to prevent future similar bugs. Fixes: 7ebc194d0fd4bb0f ("gpio: 74x164: Introduce 'enable-gpios' property") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabio Estevam Signed-off-by: Linus Walleij --- drivers/gpio/gpio-74x164.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 6b535ec858cc..15a1f4b348c4 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -23,6 +23,7 @@ struct gen_74x164_chip { struct gpio_chip gpio_chip; struct mutex lock; + struct gpio_desc *gpiod_oe; u32 registers; /* * Since the registers are chained, every byte sent will make @@ -31,8 +32,7 @@ struct gen_74x164_chip { * register at the end of the transfer. So, to have a logical * numbering, store the bytes in reverse order. */ - u8 buffer[0]; - struct gpio_desc *gpiod_oe; + u8 buffer[]; }; static int __gen_74x164_write_config(struct gen_74x164_chip *chip) -- cgit v1.2.3 From 793b9184047f3d7fe4fe234a46951dabfc3cfeeb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 22 Nov 2017 21:04:14 +0100 Subject: pinctrl: gemini: Fix usage of 3512 groups The pin config lookup function was still hardcoding the 3516 pin set, which is obviously wrong. Use the pointer in the state container. Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-gemini.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-gemini.c b/drivers/pinctrl/pinctrl-gemini.c index e9b83e291edf..c11b8f14d841 100644 --- a/drivers/pinctrl/pinctrl-gemini.c +++ b/drivers/pinctrl/pinctrl-gemini.c @@ -2322,7 +2322,7 @@ static const struct gemini_pin_conf *gemini_get_pin_conf(struct gemini_pmx *pmx, int i; for (i = 0; i < pmx->nconfs; i++) { - retconf = &gemini_confs_3516[i]; + retconf = &pmx->confs[i]; if (retconf->pin == pin) return retconf; } -- cgit v1.2.3 From 7f8e2a85c1b0bd440ffe87be82c79fdc319e8291 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Fri, 10 Nov 2017 16:43:17 +0530 Subject: gpio: davinci: Assign first bank regs for unbanked case As per the re-design assign the first bank regs for unbanked irq case. This was missed out in the original patch. Signed-off-by: Keerthy Fixes: b5cf3fd827d2e1 ("gpio: davinci: Redesign driver to accommodate ngpios in one gpio chip") Signed-off-by: Linus Walleij --- drivers/gpio/gpio-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index f75d8443ecaf..e4b3d7db68c9 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -383,7 +383,7 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger) u32 mask; d = (struct davinci_gpio_controller *)irq_data_get_irq_handler_data(data); - g = (struct davinci_gpio_regs __iomem *)d->regs; + g = (struct davinci_gpio_regs __iomem *)d->regs[0]; mask = __gpio_mask(data->irq - d->base_irq); if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) -- cgit v1.2.3 From 1c08ac0c4bd8e9d66c4dde29bc496c3b430dd028 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 28 Nov 2017 17:48:22 +0100 Subject: net: stmmac: dwmac-sun8i: fix allwinner,leds-active-low handling The driver expect "allwinner,leds-active-low" to be in PHY node, but the binding doc expect it to be in MAC node. Since all board DT use it also in MAC node, the driver need to search allwinner,leds-active-low in MAC node. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index e5ff734d4f9b..9eb7f65d8000 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -808,8 +808,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) val, reg); if (gmac->variant->soc_has_internal_phy) { - if (of_property_read_bool(priv->plat->phy_node, - "allwinner,leds-active-low")) + if (of_property_read_bool(node, "allwinner,leds-active-low")) reg |= H3_EPHY_LED_POL; else reg &= ~H3_EPHY_LED_POL; -- cgit v1.2.3 From a152992062aa3803eeabfda84b5b844721ddf6ed Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Nov 2017 14:31:46 +0100 Subject: drm/imx: always call wait_for_flip_done in commit_tail drm_atomic_helper_wait_for_vblanks will go away in the future. The new drm_atomic_helper_setup_commit in 4.15 expects that blocking commits have completed flipping before the commit_tail returns. This must be ensured by calling wait_for_vblanks or wait_for_flip_done, where flip_done might do a less agressive wait, which is fine for imx-drm. Fixes: 080de2e5be2d (drm/atomic: Check for busy planes/connectors before setting the commit) Signed-off-by: Lucas Stach Reviewed-by: Daniel Vetter Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 93c7e3f9b4a8..17d2f3a1c562 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -133,9 +133,16 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state) plane_disabling = true; } - if (plane_disabling) { - drm_atomic_helper_wait_for_vblanks(dev, state); + /* + * The flip done wait is only strictly required by imx-drm if a deferred + * plane disable is in-flight. As the core requires blocking commits + * to wait for the flip it is done here unconditionally. This keeps the + * workitem around a bit longer than required for the majority of + * non-blocking commits, but we accept that for the sake of simplicity. + */ + drm_atomic_helper_wait_for_flip_done(dev, state); + if (plane_disabling) { for_each_old_plane_in_state(state, plane, old_plane_state, i) ipu_plane_disable_deferred(plane); -- cgit v1.2.3 From 90a6ec85351b31449c2c6b5406b5396ac96f191d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 Nov 2017 16:07:51 -0800 Subject: act_sample: get rid of tcf_sample_cleanup_rcu() Similar to commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu"), TC actions don't need to respect RCU grace period, because it is either just detached from tc filter (standalone case) or it is removed together with tc filter (bound case) in which case RCU grace period is already respected at filter layer. Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Reported-by: Eric Dumazet Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Yotam Gigi Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tc_act/tc_sample.h | 1 - net/sched/act_sample.c | 14 +++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 524cee4f4c81..01dbfea32672 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -14,7 +14,6 @@ struct tcf_sample { struct psample_group __rcu *psample_group; u32 psample_group_num; struct list_head tcfm_list; - struct rcu_head rcu; }; #define to_sample(a) ((struct tcf_sample *)a) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 8b5abcd2f32f..9438969290a6 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) +static void tcf_sample_cleanup(struct tc_action *a, int bind) { - struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); + struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; - psample_group = rcu_dereference_protected(s->psample_group, 1); + psample_group = rtnl_dereference(s->psample_group); RCU_INIT_POINTER(s->psample_group, NULL); psample_group_put(psample_group); } -static void tcf_sample_cleanup(struct tc_action *a, int bind) -{ - struct tcf_sample *s = to_sample(a); - - call_rcu(&s->rcu, tcf_sample_cleanup_rcu); -} - static bool tcf_sample_dev_ok_push(struct net_device *dev) { switch (dev->type) { @@ -264,7 +257,6 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { - rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } -- cgit v1.2.3 From 3016dad75b48279e579117ee3ed566ba90a3b023 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Nov 2017 17:43:57 -0800 Subject: tcp: remove buggy call to tcp_v6_restore_cb() tcp_v6_send_reset() expects to receive an skb with skb->cb[] layout as used in TCP stack. MD5 lookup uses tcp_v6_iif() and tcp_v6_sdif() and thus TCP_SKB_CB(skb)->header.h6 This patch probably fixes RST packets sent on behalf of a timewait md5 ipv6 socket. Before Florian patch, tcp_v6_restore_cb() was needed before jumping to no_tcp_socket label. Fixes: 271c3b9b7bda ("tcp: honour SO_BINDTODEVICE for TW_RST case too") Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6bb98c93edfe..be11dc13aa70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1590,7 +1590,6 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_restore_cb(skb); tcp_v6_send_reset(sk, skb); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; -- cgit v1.2.3 From f859b4af1c52493ec21173ccc73d0b60029b5b88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Nov 2017 10:41:14 +0800 Subject: sit: update frag_off info After parsing the sit netlink change info, we forget to update frag_off in ipip6_tunnel_update(). Fix it by assigning frag_off with new value. Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d60ddcb0bfe2..d7dc23c1b2ca 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; t->fwmark = fwmark; -- cgit v1.2.3 From 6ad4cc8d1ac483e0fd33f605fb2788b0ecf51ed4 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 25 Nov 2017 12:19:48 +0000 Subject: pinctrl: sunxi: Fix A80 interrupt pin bank On the A80 the pins on port B can trigger interrupts, and those are assigned to the second interrupt bank. Having two pins assigned to the same interrupt bank/pin combination does not look healthy (instead more like a copy&paste bug from pins PA14-PA16), so fix the interrupt bank for pins PB14-PB16, which is actually 1. I don't have any A80 board, so could not test this. Fixes: d5e9fb31baa2 ("pinctrl: sunxi: Add A80 pinctrl muxing options") Signed-off-by: Andre Przywara Acked-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c index 472ef0d91b99..5553c0eb0f41 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c @@ -145,19 +145,19 @@ static const struct sunxi_desc_pin sun9i_a80_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x3, "mcsi"), /* MCLK */ - SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), /* PB_EINT14 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 14)), /* PB_EINT14 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 15), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x3, "mcsi"), /* SCK */ SUNXI_FUNCTION(0x4, "i2c4"), /* SCK */ - SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 15)), /* PB_EINT15 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 15)), /* PB_EINT15 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 16), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x3, "mcsi"), /* SDA */ SUNXI_FUNCTION(0x4, "i2c4"), /* SDA */ - SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 16)), /* PB_EINT16 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 16)), /* PB_EINT16 */ /* Hole */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 0), -- cgit v1.2.3 From 7c5c2c2d18d778e51fd8b899965097168306031c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 25 Nov 2017 12:12:30 +0000 Subject: pinctrl: sunxi: Fix A64 UART mux value To use pin PF4 as the RX signal of UART0, we have to write 0b011 into the respective pin controller register. Fix the wrong value we had in our table so far. Fixes: 96851d391d02 ("drivers: pinctrl: add driver for Allwinner A64 SoC") Signed-off-by: Andre Przywara Acked-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c index 4f2a726bbaeb..f5f77432ce6f 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c @@ -428,7 +428,7 @@ static const struct sunxi_desc_pin a64_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "mmc0"), /* D3 */ - SUNXI_FUNCTION(0x4, "uart0")), /* RX */ + SUNXI_FUNCTION(0x3, "uart0")), /* RX */ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 5), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), -- cgit v1.2.3 From 07c43a382d7de3db01cc28bf2e17ed151cde2046 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 25 Nov 2017 12:02:00 +0000 Subject: pinctrl: sunxi: Disable strict mode for H5 driver All of the H5 boards in the kernel reference the MMC0 CD pin twice in their DT, so strict mode will make the MMC driver fail to load. To keep existing DTs working, disable strict mode in the H5 driver. Signed-off-by: Andre Przywara Reported-by: Chris Obbard Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c index 97b48336f84a..a78d7b922ef4 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c @@ -535,14 +535,16 @@ static const struct sunxi_pinctrl_desc sun50i_h5_pinctrl_data_broken = { .pins = sun50i_h5_pins, .npins = ARRAY_SIZE(sun50i_h5_pins), .irq_banks = 2, - .irq_read_needs_mux = true + .irq_read_needs_mux = true, + .disable_strict_mode = true, }; static const struct sunxi_pinctrl_desc sun50i_h5_pinctrl_data = { .pins = sun50i_h5_pins, .npins = ARRAY_SIZE(sun50i_h5_pins), .irq_banks = 3, - .irq_read_needs_mux = true + .irq_read_needs_mux = true, + .disable_strict_mode = true, }; static int sun50i_h5_pinctrl_probe(struct platform_device *pdev) -- cgit v1.2.3 From 770b2edece42fa55bbe7d4cbe53347a07b8968d4 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Thu, 30 Nov 2017 10:22:40 +0800 Subject: usbip: Fix USB device hang due to wrong enabling of scatter-gather The previous USB3 SuperSpeed enabling patches mistakenly enabled URB scatter-gather chaining, which is actually not supported by the VHCI HCD. This patch fixes that. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=197867 Fixes: 03cd00d538a6feb ("usbip: vhci-hcd: Set the vhci structure up to work") Reported-by: Juan Zea Signed-off-by: Yuyang Du Cc: stable Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 713e94170963..6b3278c4b72a 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -1098,7 +1098,6 @@ static int hcd_name_to_id(const char *name) static int vhci_setup(struct usb_hcd *hcd) { struct vhci *vhci = *((void **)dev_get_platdata(hcd->self.controller)); - hcd->self.sg_tablesize = ~0; if (usb_hcd_is_primary_hcd(hcd)) { vhci->vhci_hcd_hs = hcd_to_vhci_hcd(hcd); vhci->vhci_hcd_hs->vhci = vhci; -- cgit v1.2.3 From 1ac7c8a78be85f84b019d3d2742d1a9f07255cc5 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 29 Nov 2017 15:24:22 -0700 Subject: usbip: fix usbip attach to find a port that matches the requested speed usbip attach fails to find a free port when the device on the first port is a USB_SPEED_SUPER device and non-super speed device is being attached. It keeps checking the first port and returns without a match getting stuck in a loop. Fix it check to find the first port with matching speed. Reported-by: Juan Zea Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/vhci_driver.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index 5727dfb15a83..8a1cd1616de4 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -329,9 +329,17 @@ err: int usbip_vhci_get_free_port(uint32_t speed) { for (int i = 0; i < vhci_driver->nports; i++) { - if (speed == USB_SPEED_SUPER && - vhci_driver->idev[i].hub != HUB_SPEED_SUPER) - continue; + + switch (speed) { + case USB_SPEED_SUPER: + if (vhci_driver->idev[i].hub != HUB_SPEED_SUPER) + continue; + break; + default: + if (vhci_driver->idev[i].hub != HUB_SPEED_HIGH) + continue; + break; + } if (vhci_driver->idev[i].status == VDEV_ST_NULL) return vhci_driver->idev[i].port; -- cgit v1.2.3 From 22a6c83777ac7c17d6c63891beeeac24cf5da450 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: xfs: ubsan fixes Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5..b0cccf8a81a8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1265,7 +1265,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); -- cgit v1.2.3 From 2d5f4b5bebccfe983715ebc9255151e611234643 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:22 -0800 Subject: xfs: remove unused parameter from xfs_writepage_map The first thing that xfs_writepage_map does is clobber the offset parameter. Since we never use the passed-in value, turn the parameter into a local variable. This gets rid of an UBSAN warning in generic/466. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index b0cccf8a81a8..21e2d70884e1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -896,13 +896,13 @@ xfs_writepage_map( struct writeback_control *wbc, struct inode *inode, struct page *page, - loff_t offset, - uint64_t end_offset) + uint64_t end_offset) { LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; ssize_t len = i_blocksize(inode); + uint64_t offset; int error = 0; int count = 0; int uptodate = 1; @@ -1146,7 +1146,7 @@ xfs_do_writepage( end_offset = offset; } - return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); + return xfs_writepage_map(wpc, wbc, inode, page, end_offset); redirty: redirty_page_for_writepage(wbc, page); -- cgit v1.2.3 From 3b42d385753c22b29d259ccb9d4c3f419e583b30 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 21:40:19 -0800 Subject: xfs: scrub inode mode properly Since we've used up all the bits in i_mode, the existing mode check doesn't actually do anything useful. However, we've not used all the bit values in the format portion of i_mode, so we /do/ need to test that for bad values. Fixes: 80e4e1268 ("xfs: scrub inodes") Fixes-coverity-id: 1423992 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 637b7a892313..f120fb20452f 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -318,8 +318,20 @@ xfs_scrub_dinode( /* di_mode */ mode = be16_to_cpu(dip->di_mode); - if (mode & ~(S_IALLUGO | S_IFMT)) + switch (mode & S_IFMT) { + case S_IFLNK: + case S_IFREG: + case S_IFDIR: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + /* mode is recognized */ + break; + default: xfs_scrub_ino_set_corrupt(sc, ino, bp); + break; + } /* v1/v2 fields */ switch (dip->di_version) { -- cgit v1.2.3 From 373b0589dc8d58bc09c9a28d03611ae4fb216057 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: xfs: Properly retry failed dquot items in case of error during buffer writeback Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d57c2db64e59..f248708c10ff 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -970,14 +970,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* -- cgit v1.2.3 From eb5b46faa693470681ec7c28cc2436edd1571198 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 30 Nov 2017 07:21:33 -0500 Subject: SUNRPC: Handle ENETDOWN errors Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 5 +++++ net/sunrpc/xprtsock.c | 1 + 2 files changed, 6 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a801da812f86..e2a4184f3c5d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task) case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -ENOBUFS: @@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ECONNRESET: case -ECONNABORTED: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: @@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task) */ case -ECONNREFUSED: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: @@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task) switch (task->tk_status) { case 0: /* Success */ + case -ENETDOWN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: @@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c2b2d489b57b..7fa94abfd6b2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2439,6 +2439,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ case -ECONNREFUSED: case -ECONNRESET: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: -- cgit v1.2.3 From 5ddf755e4439833847a21bd5e2dc82d686679911 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:12 -0800 Subject: RISC-V: use generic serial.h Fixes this from allmodconfig: drivers/tty/serial/earlycon.c:27:10: fatal error: asm/serial.h: No such file or directory Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 18158be62a2b..970460a0b492 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h +generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h -- cgit v1.2.3 From 5e6f82b0fe7b7b4a204efeb0817fb8b0a2bc0373 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:13 -0800 Subject: RISC-V: use RISCV_{INT,SHORT} instead of {INT,SHORT} for asm macros INT and SHORT are used by some drivers that pull in the include files, so prefixing helps avoid namespace conflicts. Other constructs in the same file already uses this. Fixes, among others, these warnings with allmodconfig: ../sound/core/pcm_misc.c:43:0: warning: "INT" redefined #define INT __force int Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm.h | 12 ++++++------ arch/riscv/include/asm/bug.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 6cbbb6a68d76..5ad4cb622bed 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -58,17 +58,17 @@ #endif #if (__SIZEOF_INT__ == 4) -#define INT __ASM_STR(.word) -#define SZINT __ASM_STR(4) -#define LGINT __ASM_STR(2) +#define RISCV_INT __ASM_STR(.word) +#define RISCV_SZINT __ASM_STR(4) +#define RISCV_LGINT __ASM_STR(2) #else #error "Unexpected __SIZEOF_INT__" #endif #if (__SIZEOF_SHORT__ == 2) -#define SHORT __ASM_STR(.half) -#define SZSHORT __ASM_STR(2) -#define LGSHORT __ASM_STR(1) +#define RISCV_SHORT __ASM_STR(.half) +#define RISCV_SZSHORT __ASM_STR(2) +#define RISCV_LGSHORT __ASM_STR(1) #else #error "Unexpected __SIZEOF_SHORT__" #endif diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index c3e13764a943..bfc7f099ab1f 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -27,8 +27,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR INT " 1b - 2b" -#define __BUG_ENTRY_FILE INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" +#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" @@ -38,7 +38,7 @@ typedef u32 bug_insn_t; #define __BUG_ENTRY \ __BUG_ENTRY_ADDR "\n\t" \ __BUG_ENTRY_FILE "\n\t" \ - SHORT " %1" + RISCV_SHORT " %1" #else #define __BUG_ENTRY \ __BUG_ENTRY_ADDR -- cgit v1.2.3 From fe2726af9fdc93ecf2469b7c512fc1a8936e128c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:14 -0800 Subject: RISC-V: io.h: type fixes for warnings include for __iomem definition. Also, add volatile to iounmap() like other architectures have it to avoid "discarding volatile" warnings from some drivers. Finally, explicitly promote the base address for INB/OUTB functions to avoid some old legacy drivers complaining about int-to-ptr promotions. The drivers are unlikely to work but they're included in allmodconfig so the warnings are noisy. Fixes, among other warnings, these with allmodconfig: ../arch/riscv/include/asm/io.h:24:21: error: expected '=', ',', ';', 'asm' or '__attribute__' before '*' token extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); sound/pci/echoaudio/echoaudio.c: In function 'snd_echo_free': sound/pci/echoaudio/echoaudio.c:1879:10: warning: passing argument 1 of 'iounmap' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers] Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 16 +++++++++------- arch/riscv/mm/ioremap.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79b..2a5c7c371e2d 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -19,6 +19,8 @@ #ifndef _ASM_RISCV_IO_H #define _ASM_RISCV_IO_H +#include + #ifdef CONFIG_MMU extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); #define ioremap_wc(addr, size) ioremap((addr), (size)) #define ioremap_wt(addr, size) ioremap((addr), (size)) -extern void iounmap(void __iomem *addr); +extern void iounmap(volatile void __iomem *addr); #endif /* CONFIG_MMU */ @@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar()) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) -#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) +#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) +#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) +#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) +#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) +#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar()) diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c index e99194a4077e..70ef2724cdf6 100644 --- a/arch/riscv/mm/ioremap.c +++ b/arch/riscv/mm/ioremap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap); * * Caller must ensure there is only one unmapping for the same pointer. */ -void iounmap(void __iomem *addr) +void iounmap(volatile void __iomem *addr) { vunmap((void *)((unsigned long)addr & PAGE_MASK)); } -- cgit v1.2.3 From 83e7b8769a08987f47117f3a065de153c839a0a8 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:15 -0800 Subject: RISC-V: move empty_zero_page definition to C and export it Needed by some modules (exported by other architectures). Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 3 --- arch/riscv/kernel/setup.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 76af908f87c1..78f670d70133 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -152,6 +152,3 @@ END(_start) __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE -ENTRY(empty_zero_page) - .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00 -END(empty_zero_page) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index de7db114c315..4375375ad168 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -60,6 +60,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; unsigned long va_pa_offset; unsigned long pfn_base; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; -- cgit v1.2.3 From 24948b7ec0f31d36dc900088b140c4f9551b6f56 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:16 -0800 Subject: RISC-V: Export some expected symbols for modules These are the ones needed by current allmodconfig, so add them instead of everything other architectures are exporting -- the rest can be added on demand later if needed. Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/riscv_ksyms.c | 3 +++ arch/riscv/kernel/setup.c | 2 ++ arch/riscv/lib/delay.c | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 23cc81ec9e94..551734248748 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,4 +12,7 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4375375ad168..8fbb6749910d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -58,7 +58,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; #endif /* CONFIG_CMDLINE_BOOL */ unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 1cc4ac3964b4..dce8ae24c6d3 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -84,6 +84,7 @@ void __delay(unsigned long cycles) while ((unsigned long)(get_cycles() - t0) < cycles) cpu_relax(); } +EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { -- cgit v1.2.3 From 4bde63286a6c7c76fe05ff0e03ad253f5260b104 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:17 -0800 Subject: RISC-V: Provide stub of setup_profiling_timer() Fixes the following on allmodconfig build: profile.c:(.text+0x3e4): undefined reference to `setup_profiling_timer' Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..17014c68e9fe 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -38,6 +38,13 @@ enum ipi_message_type { IPI_MAX }; + +/* Unsupported */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + irqreturn_t handle_ipi(void) { unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; -- cgit v1.2.3 From 4a41d5dbb0bbd0c3faffb2ccd8ef1a7aeb12f978 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:18 -0800 Subject: RISC-V: Use define for get_cycles like other architectures Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/timex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 3df4932d8964..2f26989cb864 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -18,7 +18,7 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles(void) +static inline cycles_t get_cycles_inline(void) { cycles_t n; @@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void) : "=r" (n)); return n; } +#define get_cycles get_cycles_inline #ifdef CONFIG_64BIT static inline uint64_t get_cycles64(void) -- cgit v1.2.3 From 741fc3ff3a49509a7092d6d9eb51da6bd7577278 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:20 -0800 Subject: RISC-V: Add missing include Fixes: include/asm-generic/mm_hooks.h:20:11: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/mm_hooks.h:19:38: warning: 'struct mm_struct' declared inside parameter list will not be visible outside of this definition or declaration Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..1cd5172882c2 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -14,6 +14,7 @@ #ifndef _ASM_RISCV_MMU_CONTEXT_H #define _ASM_RISCV_MMU_CONTEXT_H +#include #include #include -- cgit v1.2.3 From 2b279419567105d63f1e524bb1ac34ae8f918e5d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 08:52:42 -0800 Subject: tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_verifier_log The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases, e.g. in a test machine mimicking our production system, this test may fail due to unable to charge the required memory for prog load: # ./test_verifier_log Test log_level 0... ERROR: Program load returned: ret:-1/errno:1, expected ret:-1/errno:22 Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes the test always pass. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier_log.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c index 3cc0b561489e..e9626cf5607a 100644 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -131,11 +133,16 @@ static void test_log_bad(char *log, size_t log_len, int log_level) int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; char full_log[LOG_SIZE]; char log[LOG_SIZE]; size_t want_len; int i; + /* allow unlimited locked memory to have more consistent error code */ + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + memset(log, 1, LOG_SIZE); /* Test incorrect attr */ -- cgit v1.2.3 From 6fef90c6b3f6a2b52018e66c0886944ea0c03fcc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 30 Nov 2017 10:45:26 -0800 Subject: net: dsa: bcm_sf2: Set correct CHAIN_ID and slice number mask When configuring an IPv6 address mask, we should use SLICE_NUM_MASK as the mask in order to make sure all bits are masked by the hardware. Also, we want matching entries to have a CHAIN_ID value set to the same value as the rule index we return to user-space for convenience, so fix that too. Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules") Fixes: dd8eff68343d ("net: dsa: bcm_sf2: Allow matching arbitrary IPv6 masks/lengths") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index b721a2009b50..23b45da784cb 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -625,7 +625,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, slice_num, false); bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc, - slice_num, true); + SLICE_NUM_MASK, true); /* Insert into TCAM now because we need to insert a second rule */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]); @@ -699,7 +699,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Insert into Action and policer RAMs now, set chain ID to * the one we are chained to */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num, queue_num, true); if (ret) goto out_err; -- cgit v1.2.3 From 08f051eda33b51e8ee0f45f05bcfe49d0f0caf6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:30:32 -0700 Subject: RISC-V: Flush I$ when making a dirty page executable The RISC-V ISA allows for instruction caches that are not coherent WRT stores, even on a single hart. As a result, we need to explicitly flush the instruction cache whenever marking a dirty page as executable in order to preserve the correct system behavior. Local instruction caches aren't that scary (our implementations actually flush the cache, but RISC-V is defined to allow higher-performance implementations to exist), but RISC-V defines no way to perform an instruction cache shootdown. When explicitly asked to do so we can shoot down remote instruction caches via an IPI, but this is a bit on the slow side. Instead of requiring an IPI to all harts whenever marking a page as executable, we simply flush the currently running harts. In order to maintain correct behavior, we additionally mark every other hart as needing a deferred instruction cache which will be taken before anything runs on it. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 24 ++++++++++++--- arch/riscv/include/asm/mmu.h | 4 +++ arch/riscv/include/asm/mmu_context.h | 44 +++++++++++++++++++++++++++ arch/riscv/include/asm/pgtable.h | 58 ++++++++++++++++++++---------------- arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/kernel/smp.c | 48 +++++++++++++++++++++++++++++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/cacheflush.c | 23 ++++++++++++++ 8 files changed, 174 insertions(+), 30 deletions(-) create mode 100644 arch/riscv/mm/cacheflush.c diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0595585013b0..5c9ed39ee2a2 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -18,21 +18,37 @@ #undef flush_icache_range #undef flush_icache_user_range +#undef flush_dcache_page static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); } +#define PG_dcache_clean PG_arch_1 + +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range(start, end) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() + #ifndef CONFIG_SMP -#define flush_icache_range(start, end) local_flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() +#define flush_icache_all() local_flush_icache_all() +#define flush_icache_mm(mm, local) flush_icache_all() #else /* CONFIG_SMP */ -#define flush_icache_range(start, end) sbi_remote_fence_i(0) -#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(0) +void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 66805cba9a27..5df2dccdba12 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,10 @@ typedef struct { void *vdso; +#ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; +#endif } mm_context_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..b15b169e3d22 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,6 +20,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *task) @@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd) csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); } +/* + * When necessary, performs a deferred icache flush for the given MM context, + * on the local CPU. RISC-V has no direct mechanism for instruction cache + * shoot downs, so instead we send an IPI that informs the remote harts they + * need to flush their local instruction caches. To avoid pathologically slow + * behavior in a common case (a bunch of single-hart processes on a many-hart + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently + * executing a MM context and instead schedule a deferred local instruction + * cache flush to be performed before execution resumes on each hart. This + * actually performs that local instruction cache flush, which implicitly only + * refers to the current hart. + */ +static inline void flush_icache_deferred(struct mm_struct *mm) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_flush_icache_all(); + } +#endif +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *task) { if (likely(prev != next)) { + /* + * Mark the current MM context as inactive, and the next as + * active. This is at least used by the icache flushing + * routines in order to determine who should + */ + unsigned int cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + set_pgdir(next->pgd); local_flush_tlb_all(); + + flush_icache_deferred(next); } } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 3399257780b2..2cbd92ed1629 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) ((void)(pte)) -/* - * Certain architectures need to do special things when PTEs within - * a page table are directly modified. Thus, the following hook is - * made available. - */ -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - -static inline void pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - set_pte_at(mm, addr, ptep, __pte(0)); -} - static inline int pte_present(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT); @@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) return (pte_val(pte) == 0); } -/* static inline int pte_read(pte_t pte) */ - static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXEC; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); } -/* static inline int pte_exec(pte_t pte) */ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; @@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) return pte_val(pte_a) == pte_val(pte_b); } +/* + * Certain architectures need to do special things when PTEs within + * a page table are directly modified. Thus, the following hook is + * made available. + */ +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +void flush_icache_pte(pte_t pte); + +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + if (pte_present(pteval) && pte_exec(pteval)) + flush_icache_pte(pteval); + + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e..77edf2826c1f 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,6 +17,8 @@ #ifdef CONFIG_MMU +#include + /* Flush entire local TLB */ static inline void local_flush_tlb_all(void) { diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..1b27ade437b4 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu) { send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } + +/* + * Performs an icache flush for the given MM context. RISC-V has no direct + * mechanism for instruction cache shoot downs, so instead we send an IPI that + * informs the remote harts they need to flush their local instruction caches. + * To avoid pathologically slow behavior in a common case (a bunch of + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the + * IPIs for harts that are not currently executing a MM context and instead + * schedule a deferred local instruction cache flush to be performed before + * execution resumes on each hart. + */ +void flush_icache_mm(struct mm_struct *mm, bool local) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_flush_icache_all(); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + local |= cpumask_empty(&others); + if (mm != current->active_mm || !local) + sbi_remote_fence_i(others.bits); + else { + /* + * It's assumed that at least one strongly ordered operation is + * performed on this hart between setting a hart's cpumask bit + * and scheduling this MM context on that hart. Sending an SBI + * remote message will do this, but in the case where no + * messages are sent we still need to order this hart's writes + * with flush_icache_deferred(). + */ + smp_mb(); + } + + preempt_enable(); +} diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 81f7d9ce6d88..eb22ab49b3e0 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,3 +2,4 @@ obj-y += init.o obj-y += fault.o obj-y += extable.o obj-y += ioremap.o +obj-y += cacheflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c new file mode 100644 index 000000000000..498c0a0814fe --- /dev/null +++ b/arch/riscv/mm/cacheflush.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +void flush_icache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + flush_icache_all(); +} -- cgit v1.2.3 From 921ebd8f2c081b3cf6c3b29ef4103eef3ff26054 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:32:16 -0700 Subject: RISC-V: Allow userspace to flush the instruction cache Despite RISC-V having a direct 'fence.i' instruction available to userspace (which we can't trap!), that's not actually viable when running on Linux because the kernel might schedule a process on another hart. There is no way for userspace to handle this without invoking the kernel (as it doesn't know the thread->hart mappings), so we've defined a RISC-V specific system call to flush the instruction cache. This patch adds both a system call and a VDSO entry. If possible, we'd like to avoid having the system call be considered part of the user-facing ABI and instead restrict that to the VDSO entry -- both just in general to avoid having additional user-visible ABI to maintain, and because we'd prefer that users just call the VDSO entry because there might be a better way to do this in the future (ie, one that doesn't require entering the kernel). Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 6 ++++++ arch/riscv/include/asm/vdso-syscalls.h | 28 ++++++++++++++++++++++++++++ arch/riscv/include/asm/vdso.h | 4 ++++ arch/riscv/kernel/sys_riscv.c | 32 ++++++++++++++++++++++++++++++++ arch/riscv/kernel/syscall_table.c | 2 ++ arch/riscv/kernel/vdso/Makefile | 1 + arch/riscv/kernel/vdso/flush_icache.S | 31 +++++++++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 1 + 8 files changed, 105 insertions(+) create mode 100644 arch/riscv/include/asm/vdso-syscalls.h create mode 100644 arch/riscv/kernel/vdso/flush_icache.S diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 5c9ed39ee2a2..efd89a88d2d0 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -52,4 +52,10 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * Bits in sys_riscv_flush_icache()'s flags argument. + */ +#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL +#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) + #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h new file mode 100644 index 000000000000..a2ccf1894929 --- /dev/null +++ b/arch/riscv/include/asm/vdso-syscalls.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ASM_RISCV_VDSO_SYSCALLS_H +#define _ASM_RISCV_VDSO_SYSCALLS_H + +#ifdef CONFIG_SMP + +/* These syscalls are only used by the vDSO and are not in the uapi. */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif + +#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 602f61257553..541544d64c33 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -38,4 +38,8 @@ struct vdso_data { (void __user *)((unsigned long)(base) + __vdso_##name); \ }) +#ifdef CONFIG_SMP +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif + #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 4351be7d0533..c6c037eabaf6 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -16,6 +16,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -47,3 +48,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } #endif /* !CONFIG_64BIT */ + +#ifdef CONFIG_SMP +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + struct mm_struct *mm = current->mm; + bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; + + /* Check the reserved flags. */ + if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(mm, local); + + return 0; +} +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index 4e30dc5fb593..a5bd6401f95e 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -22,4 +23,5 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include +#include }; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 2dcc4f3070bc..324568d33921 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -6,6 +6,7 @@ vdso-syms += gettimeofday vdso-syms += clock_gettime vdso-syms += clock_getres vdso-syms += getcpu +vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 000000000000..b0fbad74e873 --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index c7543c6a00f9..cd1d47e0724b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -74,6 +74,7 @@ VERSION __vdso_clock_gettime; __vdso_clock_getres; __vdso_getcpu; + __vdso_flush_icache; local: *; }; } -- cgit v1.2.3 From 0e710ac6521f13c68a5c634471f40ae448c31e0a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 21 Nov 2017 07:59:28 -0800 Subject: RISC-V: Clean up an unused include We used to have some cmpxchg syscalls. They're no longer there, so we no longer need the include. CC: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c6c037eabaf6..a2ae936a093e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -14,7 +14,6 @@ */ #include -#include #include #include -- cgit v1.2.3 From 68615eb01f82256c19e41967bfb3eef902f77033 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 27 Nov 2017 17:31:00 +0100 Subject: hwmon: (jc42) optionally try to disable the SMBUS timeout With a nxp,se97 chip on an atmel sama5d31 board, the I2C adapter driver is not always capable of avoiding the 25-35 ms timeout as specified by the SMBUS protocol. This may cause silent corruption of the last bit of any transfer, e.g. a one is read instead of a zero if the sensor chip times out. This also affects the eeprom half of the nxp-se97 chip, where this silent corruption was originally noticed. Other I2C adapters probably suffer similar issues, e.g. bit-banging comes to mind as risky... The SMBUS register in the nxp chip is not a standard Jedec register, but it is not special to the nxp chips either, at least the atmel chips have the same mechanism. Therefore, do not special case this on the manufacturer, it is opt-in via the device property anyway. Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Peter Rosin Acked-by: Rob Herring Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/jc42.txt | 4 ++++ drivers/hwmon/jc42.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt index 07a250498fbb..f569db58f64a 100644 --- a/Documentation/devicetree/bindings/hwmon/jc42.txt +++ b/Documentation/devicetree/bindings/hwmon/jc42.txt @@ -34,6 +34,10 @@ Required properties: - reg: I2C address +Optional properties: +- smbus-timeout-disable: When set, the smbus timeout function will be disabled. + This is not supported on all chips. + Example: temp-sensor@1a { diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 5f11dc014ed6..e5234f953a6d 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -45,6 +46,7 @@ static const unsigned short normal_i2c[] = { #define JC42_REG_TEMP 0x05 #define JC42_REG_MANID 0x06 #define JC42_REG_DEVICEID 0x07 +#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */ /* Status bits in temperature register */ #define JC42_ALARM_CRIT_BIT 15 @@ -75,6 +77,9 @@ static const unsigned short normal_i2c[] = { #define GT_MANID 0x1c68 /* Giantec */ #define GT_MANID2 0x132d /* Giantec, 2nd mfg ID */ +/* SMBUS register */ +#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */ + /* Supported chips */ /* Analog Devices */ @@ -495,6 +500,22 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) data->extended = !!(cap & JC42_CAP_RANGE); + if (device_property_read_bool(dev, "smbus-timeout-disable")) { + int smbus; + + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ + smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); + if (smbus < 0) + return smbus; + i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, + smbus | SMBUS_STMOUT); + } + config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); if (config < 0) return config; -- cgit v1.2.3 From c8c088ba0edf65044c254b96fc438c91914aaab0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 13:47:54 -0800 Subject: bpf: set maximum number of attached progs to 64 for a single perf tp cgropu+bpf prog array has a maximum number of 64 programs. Let us apply the same limit here. Fixes: e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 3 ++- kernel/trace/bpf_trace.c | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b9f8686a84cf..86b50aa26ee8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1447,7 +1447,8 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) rcu_read_lock(); prog = rcu_dereference(progs)->progs; for (; *prog; prog++) - cnt++; + if (*prog != &dummy_bpf_prog.prog) + cnt++; rcu_read_unlock(); return cnt; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 27d1f4ffa3de..0ce99c379c30 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -759,6 +759,8 @@ const struct bpf_prog_ops perf_event_prog_ops = { static DEFINE_MUTEX(bpf_event_mutex); +#define BPF_TRACE_MAX_PROGS 64 + int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) { @@ -772,6 +774,12 @@ int perf_event_attach_bpf_prog(struct perf_event *event, goto unlock; old_array = event->tp_event->prog_array; + if (old_array && + bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { + ret = -E2BIG; + goto unlock; + } + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); if (ret < 0) goto unlock; -- cgit v1.2.3 From 0ec9552b43b98deb882bf48efd347be4bd7afc9f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 13:47:55 -0800 Subject: samples/bpf: add error checking for perf ioctl calls in bpf loader load_bpf_file() should fail if ioctl with command PERF_EVENT_IOC_ENABLE and PERF_EVENT_IOC_SET_BPF fails. When they do fail, proper error messages are printed. With this change, the below "syscall_tp" run shows that the maximum number of bpf progs attaching to the same perf tracepoint is indeed enforced. $ ./syscall_tp -i 64 prog #0: map ids 4 5 ... prog #63: map ids 382 383 $ ./syscall_tp -i 65 prog #0: map ids 4 5 ... prog #64: map ids 388 389 ioctl PERF_EVENT_IOC_SET_BPF failed err Argument list too long Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- samples/bpf/bpf_load.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 522ca9252d6c..242631aa4ea2 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -193,8 +193,18 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } event_fd[prog_cnt - 1] = efd; - ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); - ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", + strerror(errno)); + return -1; + } + err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", + strerror(errno)); + return -1; + } return 0; } -- cgit v1.2.3 From 435019b48033138581a6171093b181fc6b4d3d30 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:26 +0100 Subject: can: kvaser_usb: free buf in error paths The allocated buffer was not freed if usb_submit_urb() failed. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 9b18d96ef526..075644591498 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, if (err) { netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); + kfree(buf); usb_free_urb(urb); return err; } @@ -1768,6 +1769,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); usb_unanchor_urb(urb); + kfree(buf); stats->tx_dropped++; -- cgit v1.2.3 From e84f44eb5523401faeb9cc1c97895b68e3cfb78d Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:27 +0100 Subject: can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback() The conditon in the while-loop becomes true when actual_length is less than 2 (MSG_HEADER_LEN). In best case we end up with a former, already dispatched msg, that got msg->len greater than actual_length. This will result in a "Format error" error printout. Problem seen when unplugging a Kvaser USB device connected to a vbox guest. warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 075644591498..d87e330a20b3 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1334,7 +1334,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - while (pos <= urb->actual_length - MSG_HEADER_LEN) { + while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) { msg = urb->transfer_buffer + pos; /* The Kvaser firmware can only read and write messages that -- cgit v1.2.3 From 8bd13bd522ff7dfa0eb371921aeb417155f7a3be Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:28 +0100 Subject: can: kvaser_usb: ratelimit errors if incomplete messages are received Avoid flooding the kernel log with "Formate error", if incomplete message are received. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index d87e330a20b3..f95945915d20 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id, } if (pos + tmp->len > actual_len) { - dev_err(dev->udev->dev.parent, - "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } @@ -1353,7 +1353,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) } if (pos + msg->len > urb->actual_length) { - dev_err(dev->udev->dev.parent, "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } -- cgit v1.2.3 From f6c23b174c3c96616514827407769cbcfc8005cf Mon Sep 17 00:00:00 2001 From: Oliver Stäbler Date: Mon, 20 Nov 2017 14:45:15 +0100 Subject: can: ti_hecc: Fix napi poll return value for repoll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d75b1ade567f ("net: less interrupt masking in NAPI") napi repoll is done only when work_done == budget. So we need to return budget if there are still packets to receive. Signed-off-by: Oliver Stäbler Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 4d4941469cfc..db6ea936dc3f 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -637,6 +637,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota) mbx_mask = hecc_read(priv, HECC_CANMIM); mbx_mask |= HECC_TX_MBOX_MASK; hecc_write(priv, HECC_CANMIM, mbx_mask); + } else { + /* repoll is done only if whole budget is used */ + num_pkts = quota; } return num_pkts; -- cgit v1.2.3 From 5c2cb02edf79ad79d9b8d07c6d52243a948c4c9f Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 23 Nov 2017 15:44:35 +0100 Subject: can: peak/pci: fix potential bug when probe() fails PCI/PCIe drivers for PEAK-System CAN/CAN-FD interfaces do some access to the PCI config during probing. In case one of these accesses fails, a POSITIVE PCIBIOS_xxx error code is returned back. This POSITIVE error code MUST be converted into a NEGATIVE errno for the probe() function to indicate it failed. Using the pcibios_err_to_errno() function, we make sure that the return code will always be negative. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_pciefd_main.c | 5 ++++- drivers/net/can/sja1000/peak_pci.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index b4efd711f824..788c3464a3b0 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -825,7 +825,10 @@ err_release_regions: err_disable_pci: pci_disable_device(pdev); - return err; + /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while + * the probe() function must return a negative errno in case of failure + * (err is unchanged if negative) */ + return pcibios_err_to_errno(err); } /* free the board structure object, as well as its resources: */ diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 131026fbc2d7..5adc95c922ee 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -717,7 +717,10 @@ failure_release_regions: failure_disable_pci: pci_disable_device(pdev); - return err; + /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while + * the probe() function must return a negative errno in case of failure + * (err is unchanged if negative) */ + return pcibios_err_to_errno(err); } static void peak_pci_remove(struct pci_dev *pdev) -- cgit v1.2.3 From 658f534c036352a06584c8b2e71a547915173ba9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 22 Nov 2017 13:01:08 +0100 Subject: can: flexcan: Update IRQ Err Passive information The flexcan IP cores used on MX25 and MX35 do not generate Error Passive IRQs. Update the IP core overview table in the driver accordingly. Suggested-by: ZHU Yi (ST-FIR/ENG1-Zhu) Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index a13a4896a8bd..eefddae2e99a 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -184,9 +184,9 @@ * Below is some version info we got: * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re- * Filter? connected? Passive detection ception in MB - * MX25 FlexCAN2 03.00.00.00 no no ? no no + * MX25 FlexCAN2 03.00.00.00 no no no no no * MX28 FlexCAN2 03.00.04.00 yes yes no no no - * MX35 FlexCAN2 03.00.00.00 no no ? no no + * MX35 FlexCAN2 03.00.00.00 no no no no no * MX53 FlexCAN2 03.00.00.00 yes no no no no * MX6s FlexCAN3 10.00.12.00 yes yes no no yes * VF610 FlexCAN3 ? no yes ? yes yes? -- cgit v1.2.3 From 29c64b17a0bc72232acc45e9533221d88a262efb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 27 Nov 2017 09:18:21 +0100 Subject: can: flexcan: fix VF610 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for VF610 to report correct state transitions. Tested-by: Mirza Krak Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index eefddae2e99a..0626dcfd1f3d 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -189,7 +189,7 @@ * MX35 FlexCAN2 03.00.00.00 no no no no no * MX53 FlexCAN2 03.00.00.00 yes no no no no * MX6s FlexCAN3 10.00.12.00 yes yes no no yes - * VF610 FlexCAN3 ? no yes ? yes yes? + * VF610 FlexCAN3 ? no yes no yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -297,7 +297,8 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { static const struct flexcan_devtype_data fsl_vf610_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | + FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct can_bittiming_const flexcan_bittiming_const = { -- cgit v1.2.3 From 3fed8dbbc42c0639ac03d2361ab5f0606dd49e28 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 27 Nov 2017 15:49:15 -0800 Subject: can: mcba_usb: fix typo Fix typo "analizer" --> "Analyzer". Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 7f0272558bef..c4355f0a20d5 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -862,7 +862,7 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_unregister_candev; } - dev_info(&intf->dev, "Microchip CAN BUS analizer connected\n"); + dev_info(&intf->dev, "Microchip CAN BUS Analyzer connected\n"); return 0; -- cgit v1.2.3 From 1cb35a33a28394fd711bb26ddf3a564f4e9d9125 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 27 Nov 2017 15:49:16 -0800 Subject: can: mcba_usb: fix device disconnect bug Currently, when you disconnect the device, the driver infinitely resubmits all URBs, so you see: Rx URB aborted (-32) in an infinite loop. Fix this by catching -EPIPE (what we get in urb->status when the device disconnects) and not resubmitting. With this patch, I can plug and unplug many times and the driver recovers correctly. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index c4355f0a20d5..ef417dcddbf7 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -592,6 +592,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: case -ESHUTDOWN: return; -- cgit v1.2.3 From 1bcab12521d9b23dbaa22ac71184778dcc43e2f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: afs: Fix permit refcounting Fix four refcount bugs in afs_cache_permit(): (1) When checking the result of the kzalloc(), we can't just return, but must put 'permits'. (2) We shouldn't put permits immediately after hashing a new permit as we need to keep the pointer stable so that we can check to see if vnode->permit_cache has changed before we decide whether to assign to it. (3) 'permits' is being put twice. (4) We need to put either the replacement or the thing replaced after the assignment to vnode->permit_cache. Without this, lots of the following are seen: Kernel BUG at ffffffffa039857b [verbose debug info unavailable] ------------[ cut here ]------------ Kernel BUG at ffffffffa039858a [verbose debug info unavailable] ------------[ cut here ]------------ The addresses are in the .text..refcount section of the kafs.ko module. Following the relocation records for the __ex_table section shows one to be due to the decrement in afs_put_permits() and the other to be key_get() in afs_cache_permit(). Occasionally, the following is seen: refcount_t overflow at afs_cache_permit+0x57d/0x5c0 [kafs] in cc1[562], uid/euid: 0/0 WARNING: CPU: 0 PID: 562 at kernel/panic.c:657 refcount_error_report+0x9c/0xac ... Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/security.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 2b00097101b3..b88b7d45fdaa 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits) void afs_cache_permit(struct afs_vnode *vnode, struct key *key, unsigned int cb_break) { - struct afs_permits *permits, *xpermits, *replacement, *new = NULL; + struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); size_t size = 0; bool changed = false; @@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, new = kzalloc(sizeof(struct afs_permits) + sizeof(struct afs_permit) * size, GFP_NOFS); if (!new) - return; + goto out_put; refcount_set(&new->usage, 1); new->nr_permits = size; @@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, afs_hash_permits(new); - afs_put_permits(permits); - /* Now see if the permit list we want is actually already available */ spin_lock(&afs_permits_lock); @@ -262,11 +260,15 @@ found: kfree(new); spin_lock(&vnode->lock); - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || - permits != rcu_access_pointer(vnode->permit_cache)) - goto someone_else_changed_it_unlock; - rcu_assign_pointer(vnode->permit_cache, replacement); + zap = rcu_access_pointer(vnode->permit_cache); + if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + zap == permits) + rcu_assign_pointer(vnode->permit_cache, replacement); + else + zap = replacement; spin_unlock(&vnode->lock); + afs_put_permits(zap); +out_put: afs_put_permits(permits); return; -- cgit v1.2.3 From f8de483e7440b0d23ce6372b3ef8358841c8827b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: afs: Properly reset afs_vnode (inode) fields When an AFS inode is allocated by afs_alloc_inode(), the allocated afs_vnode struct isn't necessarily reset from the last time it was used as an inode because the slab constructor is only invoked once when the memory is obtained from the page allocator. This means that information can leak from one inode to the next because we're not calling kmem_cache_zalloc(). Some of the information isn't reset, in particular the permit cache pointer. Bring the clearances up to date. Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/internal.h | 5 ++++- fs/afs/super.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e03910cebdd4..804d1f905622 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -441,7 +441,10 @@ enum afs_lock_state { }; /* - * AFS inode private data + * AFS inode private data. + * + * Note that afs_alloc_inode() *must* reset anything that could incorrectly + * leak from one inode to another. */ struct afs_vnode { struct inode vfs_inode; /* the VFS's inode record */ diff --git a/fs/afs/super.c b/fs/afs/super.c index d3f97da61bdf..1037dd41a622 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb) } /* - * initialise an inode cache slab element prior to any use + * Initialise an inode cache slab element prior to any use. Note that + * afs_alloc_inode() *must* reset anything that could incorrectly leak from one + * inode to another. */ static void afs_i_init_once(void *_vnode) { @@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb) atomic_inc(&afs_count_active_inodes); + /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; + vnode->lock_key = NULL; + vnode->permit_cache = NULL; + vnode->cb_interest = NULL; +#ifdef CONFIG_AFS_FSCACHE + vnode->cache = NULL; +#endif + vnode->flags = 1 << AFS_VNODE_UNSET; + vnode->cb_type = 0; + vnode->lock_state = AFS_VNODE_LOCK_NONE; _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; -- cgit v1.2.3 From 7e8b9c1d2e2f5f45db7d40b50d14f606097c25de Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:29 +0000 Subject: arm64: module-plts: factor out PLT generation code for ftrace To allow the ftrace trampoline code to reuse the PLT entry routines, factor it out and move it into asm/module.h. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 44 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/module-plts.c | 38 ++--------------------------------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..11d4aaee82e1 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ebff6c155cac 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include #include -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; -- cgit v1.2.3 From be0f272bfc83797f70d44faca86954df62e2bbc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:30 +0000 Subject: arm64: ftrace: emit ftrace-mod.o contents through code When building the arm64 kernel with both CONFIG_ARM64_MODULE_PLTS and CONFIG_DYNAMIC_FTRACE enabled, the ftrace-mod.o object file is built with the kernel and contains a trampoline that is linked into each module, so that modules can be loaded far away from the kernel and still reach the ftrace entry point in the core kernel with an ordinary relative branch, as is emitted by the compiler instrumentation code dynamic ftrace relies on. In order to be able to build out of tree modules, this object file needs to be included into the linux-headers or linux-devel packages, which is undesirable, as it makes arm64 a special case (although a precedent does exist for 32-bit PPC). Given that the trampoline essentially consists of a PLT entry, let's not bother with a source or object file for it, and simply patch it in whenever the trampoline is being populated, using the existing PLT support routines. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 --- arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/Makefile | 3 --- arch/arm64/kernel/ftrace-mod.S | 18 ------------------ arch/arm64/kernel/ftrace.c | 14 ++++++++------ arch/arm64/kernel/module-plts.c | 12 ++++++++++++ arch/arm64/kernel/module.lds | 1 + 7 files changed, 22 insertions(+), 31 deletions(-) delete mode 100644 arch/arm64/kernel/ftrace-mod.S diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 11d4aaee82e1..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ebff6c155cac..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -120,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -131,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -181,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } -- cgit v1.2.3 From a349b30250634da20950eb91e2551dcd81f1805d Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Wed, 22 Nov 2017 21:43:59 +0900 Subject: arm64: pgd: Mark pgd_cache as __ro_after_init pgd_cache is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init Signed-off-by: Jinbum Park Signed-off-by: Will Deacon --- arch/arm64/mm/pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include #include -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { -- cgit v1.2.3 From 9de52a755cfb6da5ee21a07e3a868bdc8fbfccb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 30 Nov 2017 11:56:37 +0000 Subject: arm64: fpsimd: Fix failure to restore FPSIMD state after signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fpsimd_update_current_state() function is responsible for loading the FPSIMD state from the user signal frame into the current task during sigreturn. When implementing support for SVE, conditional code was added to this function in order to handle the case where SVE state need to be loaded for the task and merged with the FPSIMD data from the signal frame; however, the FPSIMD-only case was unintentionally dropped. As a result of this, sigreturn does not currently restore the FPSIMD state of the task, except in the case where the system supports SVE and the signal frame contains SVE state in addition to FPSIMD state. This patch fixes this bug by making the copy-in of the FPSIMD data from the signal frame to thread_struct unconditional. This remains a performance regression from v4.14, since the FPSIMD state is now copied into thread_struct and then loaded back, instead of _only_ being loaded into the CPU FPSIMD registers. However, it is essential to call task_fpsimd_load() here anyway in order to ensure that the SVE enable bit in CPACR_EL1 is set correctly before returning to userspace. This could use some refactoring, but since sigreturn is not a fast path I have kept this patch as a pure fix and left the refactoring for later. Cc: Catalin Marinas Cc: Ard Biesheuvel Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Reported-by: Alex Bennée Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } + task_fpsimd_load(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { -- cgit v1.2.3 From f8ada189550984ee21f27be736042b74a7da1d68 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Wed, 15 Nov 2017 15:39:26 +0800 Subject: arm64: perf: remove unsupported events for Cortex-A73 bus access read/write events are not supported in A73, based on the Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73" Acked-by: Julien Thierry Signed-off-by: Xu YiPing Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] -- cgit v1.2.3 From 770ba06084f7aeadea120922c775d574f3128ba3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 29 Nov 2017 17:03:03 +0300 Subject: arm64: cpu_ops: Add missing 'const' qualifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the kernel with an LTO-enabled GCC spits out the following "const" warning for the cpu_ops code: mm/percpu.c:2168:20: error: pcpu_fc_names causes a section type conflict with dt_supported_cpu_ops const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = { ^ arch/arm64/kernel/cpu_ops.c:34:37: note: ‘dt_supported_cpu_ops’ was declared here static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { Fix it by adding missed const qualifiers. Signed-off-by: Yury Norov Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; -- cgit v1.2.3 From 3a33c7605750fb6a87613044d16b1455e482414d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Nov 2017 18:25:17 +0000 Subject: arm64: context: Fix comments and remove pointless smp_wmb() The comments in the ASID allocator incorrectly hint at an MP-style idiom using the asid_generation and the active_asids array. In fact, the synchronisation is achieved using a combination of an xchg operation and a spinlock, so update the comments and remove the pointless smp_wmb(). Cc: James Morse Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 28a45a19aae7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -205,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) -- cgit v1.2.3 From 23f1b8d938c861ee0bbb786162f7ce0685f722ec Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Mon, 20 Nov 2017 10:55:15 +0100 Subject: fw_cfg: fix driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On driver remove(), all objects created during probe() should be removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder functions to match probe() error cleanup code. Cc: stable@vger.kernel.org Signed-off-by: Marc-André Lureau Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 5cfe39f7a45f..deb483064f53 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev) { pr_debug("fw_cfg: unloading.\n"); fw_cfg_sysfs_cache_cleanup(); + sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr); + fw_cfg_io_cleanup(); fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset); fw_cfg_kobj_cleanup(fw_cfg_sel_ko); - fw_cfg_io_cleanup(); return 0; } -- cgit v1.2.3 From e60ea67bb60459b95a50a156296041a13e0e380e Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Wed, 29 Nov 2017 09:23:01 +0800 Subject: virtio: release virtio index when fail to device_register index can be reused by other virtio device. Cc: stable@vger.kernel.org Signed-off-by: weiping zhang Reviewed-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 48230a5e12f2..bf7ff3934d7f 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -333,6 +333,8 @@ int register_virtio_device(struct virtio_device *dev) /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); + if (err) + ida_simple_remove(&virtio_index_ida, dev->index); out: if (err) virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); -- cgit v1.2.3 From d9e427f6ab8142d6868eb719e6a7851aafea56b6 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Fri, 1 Dec 2017 10:50:28 +0100 Subject: virtio_balloon: fix increment of vb->num_pfns in fill_balloon() commit c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") changed code to increment vb->num_pfns before call to set_page_pfns(), which used to happen only after. This patch fixes boot hang for me on ppc64le KVM guests. Fixes: c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") Cc: Michael S. Tsirkin Cc: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Cc: stable@vger.kernel.org Signed-off-by: Jan Stancek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7960746f7597..a1fb52cb3f0a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -174,13 +174,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) while ((page = balloon_page_pop(&pages))) { balloon_page_enqueue(&vb->vb_dev_info, page); - vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) adjust_managed_page_count(page, -1); + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; } num_allocated_pages = vb->num_pfns; -- cgit v1.2.3 From e4ec40ec4b260efcca15089de4285a0a3411259b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 1 Dec 2017 13:41:19 +0200 Subject: xhci: Don't show incorrect WARN message about events for empty rings xHC can generate two events for a short transfer if the short TRB and last TRB in the TD are not the same TRB. The driver will handle the TD after the first short event, and remove it from its internal list. Driver then incorrectly prints a warning for the second event: "WARN Event TRB for slot x ep y with no TDs queued" Fix this by not printing a warning if we get a event on a empty list if the previous event was a short event. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c239c688076c..6eb87c6e4d24 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2477,12 +2477,16 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ if (list_empty(&ep_ring->td_list)) { /* - * A stopped endpoint may generate an extra completion - * event if the device was suspended. Don't print - * warnings. + * Don't print wanings if it's due to a stopped endpoint + * generating an extra completion event if the device + * was suspended. Or, a event for the last TRB of a + * short TD we already got a short event for. + * The short TD is already removed from the TD list. */ + if (!(trb_comp_code == COMP_STOPPED || - trb_comp_code == COMP_STOPPED_LENGTH_INVALID)) { + trb_comp_code == COMP_STOPPED_LENGTH_INVALID || + ep_ring->last_td_was_short)) { xhci_warn(xhci, "WARN Event TRB for slot %d ep %d with no TDs queued?\n", TRB_TO_SLOT_ID(le32_to_cpu(event->flags)), ep_index); -- cgit v1.2.3 From 80e457699a8dbdd70f2d26911e46f538645c55fc Mon Sep 17 00:00:00 2001 From: Yu Chen Date: Fri, 1 Dec 2017 13:41:20 +0200 Subject: usb: xhci: fix panic in xhci_free_virt_devices_depth_first Check vdev->real_port 0 to avoid panic [ 9.261347] [] xhci_free_virt_devices_depth_first+0x58/0x108 [ 9.261352] [] xhci_mem_cleanup+0x1bc/0x570 [ 9.261355] [] xhci_stop+0x140/0x1c8 [ 9.261365] [] usb_remove_hcd+0xfc/0x1d0 [ 9.261369] [] xhci_plat_remove+0x6c/0xa8 [ 9.261377] [] platform_drv_remove+0x2c/0x70 [ 9.261384] [] __device_release_driver+0x80/0x108 [ 9.261387] [] device_release_driver+0x2c/0x40 [ 9.261392] [] bus_remove_device+0xe0/0x120 [ 9.261396] [] device_del+0x114/0x210 [ 9.261399] [] platform_device_del+0x30/0xa0 [ 9.261403] [] dwc3_otg_work+0x204/0x488 [ 9.261407] [] event_work+0x304/0x5b8 [ 9.261414] [] process_one_work+0x148/0x490 [ 9.261417] [] worker_thread+0x50/0x4a0 [ 9.261421] [] kthread+0xe8/0x100 [ 9.261427] [] ret_from_fork+0x10/0x50 The problem can occur if xhci_plat_remove() is called shortly after xhci_plat_probe(). While xhci_free_virt_devices_depth_first been called before the device has been setup and get real_port initialized. The problem occurred on Hikey960 and was reproduced by Guenter Roeck on Kevin with chromeos-4.4. Fixes: ee8665e28e8d ("xhci: free xhci virtual devices with leaf nodes first") Cc: Guenter Roeck Cc: # v4.10+ Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Fan Ning Signed-off-by: Li Rui Signed-off-by: yangdi Signed-off-by: Yu Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index e1fba4688509..15f7d422885f 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -934,6 +934,12 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) if (!vdev) return; + if (vdev->real_port == 0 || + vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) { + xhci_dbg(xhci, "Bad vdev->real_port.\n"); + goto out; + } + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { /* is this a hub device that added a tt_info to the tts list */ @@ -947,6 +953,7 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) } } } +out: /* we are now at a leaf device */ xhci_debugfs_remove_slot(xhci, slot_id); xhci_free_virt_device(xhci, slot_id); -- cgit v1.2.3 From d30fc5126efb0c33b7adf5966d3051db2c3d7721 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:34 +0800 Subject: sctp: only update outstanding_bytes for transmitted queue when doing prsctp_prune Now outstanding_bytes is only increased when appending chunks into one packet and sending it at 1st time, while decreased when it is about to move into retransmit queue. It means outstanding_bytes value is already decreased for all chunks in retransmit queue. However sctp_prsctp_prune_sent is a common function to check the chunks in both transmitted and retransmit queue, it decrease outstanding_bytes when moving a chunk into abandoned queue from either of them. It could cause outstanding_bytes underflow, as it also decreases it's value for the chunks in retransmit queue. This patch fixes it by only updating outstanding_bytes for transmitted queue when pruning queues for prsctp prio policy, the same fix is also needed in sctp_check_transmitted. Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4db012aa25f7..7029f8b99063 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -377,7 +377,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - if (!chk->tsn_gap_acked) { + if (queue != &asoc->outqueue.retransmit && + !chk->tsn_gap_acked) { if (chk->transport) chk->transport->flight_size -= sctp_data_size(chk); @@ -1434,7 +1435,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* If this chunk has not been acked, stop * considering it as 'outstanding'. */ - if (!tchunk->tsn_gap_acked) { + if (transmitted_queue != &q->retransmit && + !tchunk->tsn_gap_acked) { if (tchunk->transport) tchunk->transport->flight_size -= sctp_data_size(tchunk); -- cgit v1.2.3 From e5f612969c6f965e3bd1158598e0a3b1c4f389b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:35 +0800 Subject: sctp: abandon the whole msg if one part of a fragmented message is abandoned As rfc3758#section-3.1 demands: A3) When a TSN is "abandoned", if it is part of a fragmented message, all other TSN's within that fragmented message MUST be abandoned at the same time. Besides, if it couldn't handle this, the rest frags would never get assembled in peer side. This patch supports it by adding abandoned flag in sctp_datamsg, when one chunk is being abandoned, set chunk->msg->abandoned as well. Next time when checking for abandoned, go checking chunk->msg->abandoned first. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 ++- net/sctp/chunk.c | 7 +++++++ net/sctp/outqueue.c | 12 ++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 16f949eef52f..2f8f93da5dc2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -503,7 +503,8 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_delay; /* should this message be Nagle delayed */ + can_delay:1, /* should this message be Nagle delayed */ + abandoned:1; /* should this message be abandoned */ }; struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7b261afc47b9..9213805b558d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_delay = 1; + msg->abandoned = 0; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); } @@ -304,6 +305,9 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (!chunk->asoc->peer.prsctp_capable) return 0; + if (chunk->msg->abandoned) + return 1; + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = @@ -316,6 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } + chunk->msg->abandoned = 1; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { @@ -324,10 +329,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + chunk->msg->abandoned = 1; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && time_after(jiffies, chunk->msg->expires_at)) { + chunk->msg->abandoned = 1; return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 7029f8b99063..4ab164b5aad0 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { struct sctp_stream_out *streamout; - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; list_del_init(&chk->transmitted_list); sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); @@ -404,10 +406,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->sched->unsched_all(&asoc->stream); list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; -- cgit v1.2.3 From 779edd7348878a7376c0e3d0f96485c30b5f1b7d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:36 +0800 Subject: sctp: do not abandon the other frags in unsent outq if one msg has outstanding frags Now for the abandoned chunks in unsent outq, it would just free the chunks. Because no tsn is assigned to them yet, there's no need to send fwd tsn to peer, unlike for the abandoned chunks in sent outq. The problem is when parts of the msg have been sent and the other frags are still in unsent outq, if they are abandoned/dropped, the peer would never get this msg reassembled. So these frags in unsent outq can't be dropped if this msg already has outstanding frags. This patch does the check in sctp_chunk_abandoned and sctp_prsctp_prune_unsent. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 ++++ net/sctp/outqueue.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 9213805b558d..7f8baa48e7c2 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -308,6 +308,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (chunk->msg->abandoned) return 1; + if (!chunk->has_tsn && + !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)) + return 0; + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4ab164b5aad0..7d67feeeffc1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -407,7 +407,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!chk->msg->abandoned && - (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) || + !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; -- cgit v1.2.3 From cfac7f836a715b91f08c851df915d401a4d52783 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Dec 2017 10:06:56 -0800 Subject: tcp/dccp: block bh before arming time_wait timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maciej Żenczykowski reported some panics in tcp_twsk_destructor() that might be caused by the following bug. timewait timer is pinned to the cpu, because we want to transition timwewait refcount from 0 to 4 in one go, once everything has been initialized. At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") was merged, TCP was always running from BH habdler. After commit 5413d1babe8f ("net: do not block BH while processing socket backlog") we definitely can run tcp_time_wait() from process context. We need to block BH in the critical section so that the pinned timer has still its purpose. This bug is more likely to happen under stress and when very small RTO are used in datacenter flows. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 6 ++++++ net/ipv4/tcp_minisocks.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index abd07a443219..178bb9833311 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e36eff0403f4..b079b619b60c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than -- cgit v1.2.3 From c7799c067c2ae33e348508c8afec354f3257ff25 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 29 Nov 2017 12:48:42 +0200 Subject: tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv() Remove the second tipc_rcv() call in tipc_udp_recv(). We have just checked that the bearer is not up, and calling tipc_rcv() with a bearer that is not up leads to a TIPC div-by-zero crash in tipc_node_calculate_timer(). The crash is rare in practice, but can happen like this: We're enabling a bearer, but it's not yet up and fully initialized. At the same time we receive a discovery packet, and in tipc_udp_recv() we end up calling tipc_rcv() with the not-yet-initialized bearer, causing later the div-by-zero crash in tipc_node_calculate_timer(). Jon Maloy explains the impact of removing the second tipc_rcv() call: "link setup in the worst case will be delayed until the next arriving discovery messages, 1 sec later, and this is an acceptable delay." As the tipc_rcv() call is removed, just leave the function via the rcu_out label, so that we will kfree_skb(). [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.668088] divide error: 0000 [#1] SMP [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000 [ 12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600 [ 12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001 [ 12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8 [ 12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800 [ 12.702338] FS: 0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000 [ 12.705099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0 [ 12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d [ 12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000 [ 12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88 [ 12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002 [ 12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000 [ 12.741831] default_idle+0x2a/0x100 [ 12.742323] arch_cpu_idle+0xf/0x20 [ 12.742796] default_idle_call+0x28/0x40 [ 12.743312] do_idle+0x179/0x1f0 [ 12.743761] cpu_startup_entry+0x1d/0x20 [ 12.744291] start_secondary+0x112/0x120 [ 12.744816] secondary_startup_64+0xa5/0xa5 [ 12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00 00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48 89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f [ 12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0 [ 12.748555] ---[ end trace 1399ab83390650fd ]--- [ 12.749296] Kernel panic - not syncing: Fatal exception in interrupt [ 12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 12.751215] Rebooting in 60 seconds.. Fixes: c9b64d492b1f ("tipc: add replicast peer discovery") Signed-off-by: Tommi Rantala Cc: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: -- cgit v1.2.3 From a0da456bbf95d2a9294799bb05c61bfb24736bb7 Mon Sep 17 00:00:00 2001 From: Max Uvarov Date: Thu, 30 Nov 2017 13:08:29 +0300 Subject: net: phy-micrel: check return code in flp center function Fix obvious typo that first return value is set but not checked. Signed-off-by: Max Uvarov Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fdb43dd9b5cd..ab4614113403 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -496,16 +496,18 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev, return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); } +/* Center KSZ9031RNX FLP timing at 16ms. */ static int ksz9031_center_flp_timing(struct phy_device *phydev) { int result; - /* Center KSZ9031RNX FLP timing at 16ms. */ result = ksz9031_extended_write(phydev, OP_DATA, 0, MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006); + if (result) + return result; + result = ksz9031_extended_write(phydev, OP_DATA, 0, MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80); - if (result) return result; -- cgit v1.2.3 From acf1c02f023926b8b04672a9e81b1711ae681619 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:11 +0000 Subject: sfp: fix RX_LOS signal handling The options word is a be16 quantity, so we need to test the flags having converted the endian-ness. Convert the flag bits to be16, which can be optimised by the compiler, rather than converting a variable at runtime. Reported-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index e381811e5f11..3355141688a6 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -355,7 +355,7 @@ static void sfp_sm_link_check_los(struct sfp *sfp) * SFP_OPTIONS_LOS_NORMAL are set? For now, we assume * the same as SFP_OPTIONS_LOS_NORMAL set. */ - if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED) + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED)) los ^= SFP_F_LOS; if (los) @@ -582,7 +582,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_TX_FAULT) sfp_sm_fault(sfp, true); else if (event == - (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ? + (sfp->id.ext.options & + cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? SFP_E_LOS_HIGH : SFP_E_LOS_LOW)) sfp_sm_link_up(sfp); break; @@ -592,7 +593,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) sfp_sm_link_down(sfp); sfp_sm_fault(sfp, true); } else if (event == - (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ? + (sfp->id.ext.options & + cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) { sfp_sm_link_down(sfp); sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); -- cgit v1.2.3 From 710dfbb01a8ac0fc8d0cc191131bd84dc3796497 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:16 +0000 Subject: sfp: improve RX_LOS handling There are two bits in the option word for the RX_LOS signal. One reports that the RX_LOS signal is active high, the other reports that it is active low. When both or neither are set, the result is not well defined in the specification. Rather than assuming that neither set means normal RX_LOS, take this as meaning no RX_LOS signal available, thereby ignoring the signal. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3355141688a6..c1aab6a81ce9 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -351,12 +351,13 @@ static void sfp_sm_link_check_los(struct sfp *sfp) { unsigned int los = sfp->state & SFP_F_LOS; - /* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor - * SFP_OPTIONS_LOS_NORMAL are set? For now, we assume - * the same as SFP_OPTIONS_LOS_NORMAL set. + /* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL + * are set, we assume that no LOS signal is available. */ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED)) los ^= SFP_F_LOS; + else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL))) + los = 0; if (los) sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); @@ -364,6 +365,22 @@ static void sfp_sm_link_check_los(struct sfp *sfp) sfp_sm_link_up(sfp); } +static bool sfp_los_event_active(struct sfp *sfp, unsigned int event) +{ + return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && + event == SFP_E_LOS_LOW) || + (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && + event == SFP_E_LOS_HIGH); +} + +static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) +{ + return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && + event == SFP_E_LOS_HIGH) || + (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && + event == SFP_E_LOS_LOW); +} + static void sfp_sm_fault(struct sfp *sfp, bool warn) { if (sfp->sm_retries && !--sfp->sm_retries) { @@ -581,10 +598,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) case SFP_S_WAIT_LOS: if (event == SFP_E_TX_FAULT) sfp_sm_fault(sfp, true); - else if (event == - (sfp->id.ext.options & - cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? - SFP_E_LOS_HIGH : SFP_E_LOS_LOW)) + else if (sfp_los_event_inactive(sfp, event)) sfp_sm_link_up(sfp); break; @@ -592,10 +606,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_TX_FAULT) { sfp_sm_link_down(sfp); sfp_sm_fault(sfp, true); - } else if (event == - (sfp->id.ext.options & - cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? - SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) { + } else if (sfp_los_event_active(sfp, event)) { sfp_sm_link_down(sfp); sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); } -- cgit v1.2.3 From ec7681bde6d0e53ea5ef564477c8656fc318023a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:21 +0000 Subject: sfp: warn about modules requiring address change sequence We do not support SFP modules which require the address change sequence as detailed by SFF 8472 revision 1.22 section 8.9. Warn when these modules are inserted, and treat them as SFF8079 modules for ethtool. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c1aab6a81ce9..9dfc1c4c954f 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -487,6 +487,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp) return -EINVAL; } + /* If the module requires address swap mode, warn about it */ + if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) + dev_warn(sfp->dev, + "module address swap to access page 0xA2 is not supported.\n"); + return sfp_module_insert(sfp->sfp_bus, &sfp->id); } @@ -652,7 +657,8 @@ static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo) { /* locking... and check module is present */ - if (sfp->id.ext.sff8472_compliance) { + if (sfp->id.ext.sff8472_compliance && + !(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) { modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; } else { -- cgit v1.2.3 From 2012b7d6b2868c532f22d9172c8b24611637eb48 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:26 +0000 Subject: phylink: ensure we take the link down when phylink_stop() is called Ensure that we tell the MAC to take the link down when phylink_stop() is called, and that this completes prior to phylink_stop() returns. Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e3bbc70372d3..5dc9668dde34 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -773,6 +773,7 @@ void phylink_stop(struct phylink *pl) sfp_upstream_stop(pl->sfp_bus); set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); } EXPORT_SYMBOL_GPL(phylink_stop); -- cgit v1.2.3 From 4db2b604c05afc3d2678fe01d3136c015df313ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Nov 2017 11:47:28 +0100 Subject: move libgcc.h to include/linux Introducing a new include/lib directory just for this file totally messes up tab completion for include/linux, which is highly annoying. Move it to include/linux where we have headers for all kinds of other lib/ code as well. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- include/lib/libgcc.h | 43 ------------------------------------------- include/linux/libgcc.h | 43 +++++++++++++++++++++++++++++++++++++++++++ lib/ashldi3.c | 2 +- lib/ashrdi3.c | 2 +- lib/cmpdi2.c | 2 +- lib/lshrdi3.c | 2 +- lib/muldi3.c | 2 +- lib/ucmpdi2.c | 2 +- 8 files changed, 49 insertions(+), 49 deletions(-) delete mode 100644 include/lib/libgcc.h create mode 100644 include/linux/libgcc.h diff --git a/include/lib/libgcc.h b/include/lib/libgcc.h deleted file mode 100644 index 32e1e0f4b2d0..000000000000 --- a/include/lib/libgcc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * include/lib/libgcc.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc. - */ - -#ifndef __LIB_LIBGCC_H -#define __LIB_LIBGCC_H - -#include - -typedef int word_type __attribute__ ((mode (__word__))); - -#ifdef __BIG_ENDIAN -struct DWstruct { - int high, low; -}; -#elif defined(__LITTLE_ENDIAN) -struct DWstruct { - int low, high; -}; -#else -#error I feel sick. -#endif - -typedef union { - struct DWstruct s; - long long ll; -} DWunion; - -#endif /* __ASM_LIBGCC_H */ diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h new file mode 100644 index 000000000000..32e1e0f4b2d0 --- /dev/null +++ b/include/linux/libgcc.h @@ -0,0 +1,43 @@ +/* + * include/lib/libgcc.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc. + */ + +#ifndef __LIB_LIBGCC_H +#define __LIB_LIBGCC_H + +#include + +typedef int word_type __attribute__ ((mode (__word__))); + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union { + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ diff --git a/lib/ashldi3.c b/lib/ashldi3.c index 1b6087db95a5..3ffc46e3bb6c 100644 --- a/lib/ashldi3.c +++ b/lib/ashldi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashldi3(long long u, word_type b) { diff --git a/lib/ashrdi3.c b/lib/ashrdi3.c index 2e67c97ac65a..ea054550f0e8 100644 --- a/lib/ashrdi3.c +++ b/lib/ashrdi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashrdi3(long long u, word_type b) { diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c index 6d7ebf6c2b86..2250da7e503e 100644 --- a/lib/cmpdi2.c +++ b/lib/cmpdi2.c @@ -16,7 +16,7 @@ #include -#include +#include word_type notrace __cmpdi2(long long a, long long b) { diff --git a/lib/lshrdi3.c b/lib/lshrdi3.c index 8e845f4bb65f..99cfa5721f2d 100644 --- a/lib/lshrdi3.c +++ b/lib/lshrdi3.c @@ -17,7 +17,7 @@ */ #include -#include +#include long long notrace __lshrdi3(long long u, word_type b) { diff --git a/lib/muldi3.c b/lib/muldi3.c index 88938543e10a..54c8b3123376 100644 --- a/lib/muldi3.c +++ b/lib/muldi3.c @@ -15,7 +15,7 @@ */ #include -#include +#include #define W_TYPE_SIZE 32 diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c index 49a53505c8e3..25ca2d4c1e19 100644 --- a/lib/ucmpdi2.c +++ b/lib/ucmpdi2.c @@ -15,7 +15,7 @@ */ #include -#include +#include word_type __ucmpdi2(unsigned long long a, unsigned long long b) { -- cgit v1.2.3 From da894ff100be9044c490f47f61541481b4f42b1f Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:15:32 -0800 Subject: RISC-V: __io_writes should respect the length argument Whoops -- I must have just been being an idiot again. Thanks to Segher for finding the bug :). CC: Segher Boessenkool Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79b..507ee6a16e67 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -250,7 +250,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) const ctype *buf = buffer; \ \ do { \ - __raw_writeq(*buf++, addr); \ + __raw_write ## len(*buf++, addr); \ } while (--count); \ } \ afence; \ -- cgit v1.2.3 From 81b039ec36a41a5451e1e36f05bb055eceab1dc8 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 13 Nov 2017 00:01:20 +0800 Subject: iio: adc: cpcap: fix incorrect validation Function platform_get_irq_byname() returns a negative error code on failure, and a zero or positive number on success. However, in function cpcap_adc_probe(), positive IRQ numbers are also taken as error cases. Use "if (ddata->irq < 0)" instead of "if (!ddata->irq)" to validate the return value of platform_get_irq_byname(). Signed-off-by: Pan Bian Fixes: 25ec249632d50 ("iio: adc: cpcap: Add minimal support for CPCAP PMIC ADC") Reviewed-by: Sebastian Reichel Acked-by: Tony Lindgren Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/cpcap-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/cpcap-adc.c b/drivers/iio/adc/cpcap-adc.c index 3576ec73ec23..9ad60421d360 100644 --- a/drivers/iio/adc/cpcap-adc.c +++ b/drivers/iio/adc/cpcap-adc.c @@ -1011,7 +1011,7 @@ static int cpcap_adc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, indio_dev); ddata->irq = platform_get_irq_byname(pdev, "adcdone"); - if (!ddata->irq) + if (ddata->irq < 0) return -ENODEV; error = devm_request_threaded_irq(&pdev->dev, ddata->irq, NULL, -- cgit v1.2.3 From e53111ad5deaef98d8c18a9933ef1f5624c5979d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 Nov 2017 15:03:36 +0200 Subject: iio: proximity: sx9500: Assign interrupt from GpioIo() The commit 0f0796509c07 ("iio: remove gpio interrupt probing from drivers that use a single interrupt") removed custom IRQ assignment for the drivers which are enumerated via ACPI or OF. Unfortunately, some ACPI tables have IRQ line defined as GpioIo() resource and thus automatic IRQ allocation will fail. Partially revert the commit 0f0796509c07 to restore original behaviour. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9500.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c index 53c5d653e780..df23dbcc030a 100644 --- a/drivers/iio/proximity/sx9500.c +++ b/drivers/iio/proximity/sx9500.c @@ -869,6 +869,7 @@ static int sx9500_init_device(struct iio_dev *indio_dev) static void sx9500_gpio_probe(struct i2c_client *client, struct sx9500_data *data) { + struct gpio_desc *gpiod_int; struct device *dev; if (!client) @@ -876,6 +877,14 @@ static void sx9500_gpio_probe(struct i2c_client *client, dev = &client->dev; + if (client->irq <= 0) { + gpiod_int = devm_gpiod_get(dev, SX9500_GPIO_INT, GPIOD_IN); + if (IS_ERR(gpiod_int)) + dev_err(dev, "gpio get irq failed\n"); + else + client->irq = gpiod_to_irq(gpiod_int); + } + data->gpiod_rst = devm_gpiod_get(dev, SX9500_GPIO_RESET, GPIOD_OUT_HIGH); if (IS_ERR(data->gpiod_rst)) { dev_warn(dev, "gpio get reset pin failed\n"); -- cgit v1.2.3 From 7a6b0420d2fe4ce59437bd318826fe468f0d71ae Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 31 Oct 2017 21:01:43 +0100 Subject: iio: adc: meson-saradc: fix the bit_idx of the adc_en clock Meson8 and Meson8b SoCs use the the SAR ADC gate clock provided by the MESON_SAR_ADC_REG3 register within the SAR ADC register area. According to the datasheet (and the existing MESON_SAR_ADC_REG3_CLK_EN definition) the gate is on bit 30. The fls() function returns the last set bit, which is "bit index + 1" (fls(MESON_SAR_ADC_REG3_CLK_EN) returns 31). Fix this by switching to __ffs() which returns the first set bit, which is bit 30 in our case. This off by one error results in the ADC not being usable on devices where the bootloader did not enable the clock. Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Signed-off-by: Martin Blumenstingl Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 9c6932ffc0af..1d25c78b74d2 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -600,7 +600,7 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev, init.num_parents = 1; priv->clk_gate.reg = base + MESON_SAR_ADC_REG3; - priv->clk_gate.bit_idx = fls(MESON_SAR_ADC_REG3_CLK_EN); + priv->clk_gate.bit_idx = __ffs(MESON_SAR_ADC_REG3_CLK_EN); priv->clk_gate.hw.init = &init; priv->adc_clk = devm_clk_register(&indio_dev->dev, &priv->clk_gate.hw); -- cgit v1.2.3 From d85eed9f576369bc90322659de96b7dbea1f9a57 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 31 Oct 2017 21:01:44 +0100 Subject: iio: adc: meson-saradc: initialize the bandgap correctly on older SoCs Meson8 and Meson8b do not have the MESON_SAR_ADC_REG11 register. The bandgap setting for these SoCs is configured in the MESON_SAR_ADC_DELTA_10 register instead. Make the driver aware of this difference and use the correct bandgap register depending on the SoC. This has worked fine on Meson8 and Meson8b because the bootloader is already initializing the bandgap setting. Fixes: 6c76ed31cd05 ("iio: adc: meson-saradc: add Meson8b SoC compatibility") Signed-off-by: Martin Blumenstingl Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 1d25c78b74d2..a220d07d890f 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -221,6 +221,7 @@ enum meson_sar_adc_chan7_mux_sel { struct meson_sar_adc_data { bool has_bl30_integration; + u32 bandgap_reg; unsigned int resolution; const char *name; }; @@ -685,6 +686,20 @@ static int meson_sar_adc_init(struct iio_dev *indio_dev) return 0; } +static void meson_sar_adc_set_bandgap(struct iio_dev *indio_dev, bool on_off) +{ + struct meson_sar_adc_priv *priv = iio_priv(indio_dev); + u32 enable_mask; + + if (priv->data->bandgap_reg == MESON_SAR_ADC_REG11) + enable_mask = MESON_SAR_ADC_REG11_BANDGAP_EN; + else + enable_mask = MESON_SAR_ADC_DELTA_10_TS_VBG_EN; + + regmap_update_bits(priv->regmap, priv->data->bandgap_reg, enable_mask, + on_off ? enable_mask : 0); +} + static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev) { struct meson_sar_adc_priv *priv = iio_priv(indio_dev); @@ -717,9 +732,9 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev) regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1); regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0, MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval); - regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11, - MESON_SAR_ADC_REG11_BANDGAP_EN, - MESON_SAR_ADC_REG11_BANDGAP_EN); + + meson_sar_adc_set_bandgap(indio_dev, true); + regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3, MESON_SAR_ADC_REG3_ADC_EN, MESON_SAR_ADC_REG3_ADC_EN); @@ -739,8 +754,7 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev) err_adc_clk: regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3, MESON_SAR_ADC_REG3_ADC_EN, 0); - regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11, - MESON_SAR_ADC_REG11_BANDGAP_EN, 0); + meson_sar_adc_set_bandgap(indio_dev, false); clk_disable_unprepare(priv->sana_clk); err_sana_clk: clk_disable_unprepare(priv->core_clk); @@ -765,8 +779,8 @@ static int meson_sar_adc_hw_disable(struct iio_dev *indio_dev) regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3, MESON_SAR_ADC_REG3_ADC_EN, 0); - regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11, - MESON_SAR_ADC_REG11_BANDGAP_EN, 0); + + meson_sar_adc_set_bandgap(indio_dev, false); clk_disable_unprepare(priv->sana_clk); clk_disable_unprepare(priv->core_clk); @@ -844,30 +858,35 @@ static const struct iio_info meson_sar_adc_iio_info = { static const struct meson_sar_adc_data meson_sar_adc_meson8_data = { .has_bl30_integration = false, + .bandgap_reg = MESON_SAR_ADC_DELTA_10, .resolution = 10, .name = "meson-meson8-saradc", }; static const struct meson_sar_adc_data meson_sar_adc_meson8b_data = { .has_bl30_integration = false, + .bandgap_reg = MESON_SAR_ADC_DELTA_10, .resolution = 10, .name = "meson-meson8b-saradc", }; static const struct meson_sar_adc_data meson_sar_adc_gxbb_data = { .has_bl30_integration = true, + .bandgap_reg = MESON_SAR_ADC_REG11, .resolution = 10, .name = "meson-gxbb-saradc", }; static const struct meson_sar_adc_data meson_sar_adc_gxl_data = { .has_bl30_integration = true, + .bandgap_reg = MESON_SAR_ADC_REG11, .resolution = 12, .name = "meson-gxl-saradc", }; static const struct meson_sar_adc_data meson_sar_adc_gxm_data = { .has_bl30_integration = true, + .bandgap_reg = MESON_SAR_ADC_REG11, .resolution = 12, .name = "meson-gxm-saradc", }; -- cgit v1.2.3 From 96748823c483c6eed8321f78bd128dd33f09c55c Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 31 Oct 2017 21:01:45 +0100 Subject: iio: adc: meson-saradc: Meson8 and Meson8b do not have REG11 and REG13 The Meson GXBB and newer SoCs have a few more registers than the older Meson8 and Meson8b SoCs. Use a separate regmap config to limit the older SoCs to the DELTA_10 register. Fixes: 6c76ed31cd05 ("iio: adc: meson-saradc: add Meson8b SoC compatibility") Signed-off-by: Martin Blumenstingl Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index a220d07d890f..36047147ce7c 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -224,6 +224,7 @@ struct meson_sar_adc_data { u32 bandgap_reg; unsigned int resolution; const char *name; + const struct regmap_config *regmap_config; }; struct meson_sar_adc_priv { @@ -243,13 +244,20 @@ struct meson_sar_adc_priv { int calibscale; }; -static const struct regmap_config meson_sar_adc_regmap_config = { +static const struct regmap_config meson_sar_adc_regmap_config_gxbb = { .reg_bits = 8, .val_bits = 32, .reg_stride = 4, .max_register = MESON_SAR_ADC_REG13, }; +static const struct regmap_config meson_sar_adc_regmap_config_meson8 = { + .reg_bits = 8, + .val_bits = 32, + .reg_stride = 4, + .max_register = MESON_SAR_ADC_DELTA_10, +}; + static unsigned int meson_sar_adc_get_fifo_count(struct iio_dev *indio_dev) { struct meson_sar_adc_priv *priv = iio_priv(indio_dev); @@ -859,6 +867,7 @@ static const struct iio_info meson_sar_adc_iio_info = { static const struct meson_sar_adc_data meson_sar_adc_meson8_data = { .has_bl30_integration = false, .bandgap_reg = MESON_SAR_ADC_DELTA_10, + .regmap_config = &meson_sar_adc_regmap_config_meson8, .resolution = 10, .name = "meson-meson8-saradc", }; @@ -866,6 +875,7 @@ static const struct meson_sar_adc_data meson_sar_adc_meson8_data = { static const struct meson_sar_adc_data meson_sar_adc_meson8b_data = { .has_bl30_integration = false, .bandgap_reg = MESON_SAR_ADC_DELTA_10, + .regmap_config = &meson_sar_adc_regmap_config_meson8, .resolution = 10, .name = "meson-meson8b-saradc", }; @@ -873,6 +883,7 @@ static const struct meson_sar_adc_data meson_sar_adc_meson8b_data = { static const struct meson_sar_adc_data meson_sar_adc_gxbb_data = { .has_bl30_integration = true, .bandgap_reg = MESON_SAR_ADC_REG11, + .regmap_config = &meson_sar_adc_regmap_config_gxbb, .resolution = 10, .name = "meson-gxbb-saradc", }; @@ -880,6 +891,7 @@ static const struct meson_sar_adc_data meson_sar_adc_gxbb_data = { static const struct meson_sar_adc_data meson_sar_adc_gxl_data = { .has_bl30_integration = true, .bandgap_reg = MESON_SAR_ADC_REG11, + .regmap_config = &meson_sar_adc_regmap_config_gxbb, .resolution = 12, .name = "meson-gxl-saradc", }; @@ -887,6 +899,7 @@ static const struct meson_sar_adc_data meson_sar_adc_gxl_data = { static const struct meson_sar_adc_data meson_sar_adc_gxm_data = { .has_bl30_integration = true, .bandgap_reg = MESON_SAR_ADC_REG11, + .regmap_config = &meson_sar_adc_regmap_config_gxbb, .resolution = 12, .name = "meson-gxm-saradc", }; @@ -964,7 +977,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev) return ret; priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, - &meson_sar_adc_regmap_config); + priv->data->regmap_config); if (IS_ERR(priv->regmap)) return PTR_ERR(priv->regmap); -- cgit v1.2.3 From c175cb7cd953782bbf4e8bdf088ad61440d6dde5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Oct 2017 17:06:01 -0700 Subject: iio: fix kernel-doc build errors Fix build errors in kernel-doc notation. Symbols that end in '_' have a special meaning, but adding a '*' makes them OK. ../drivers/iio/industrialio-core.c:635: ERROR: Unknown target name: "iio_val". ../drivers/iio/industrialio-core.c:642: ERROR: Unknown target name: "iio_val". Signed-off-by: Randy Dunlap Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 9c4cfd19b739..2f0998ebeed2 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -631,7 +631,7 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type, * iio_format_value() - Formats a IIO value into its string representation * @buf: The buffer to which the formatted value gets written * which is assumed to be big enough (i.e. PAGE_SIZE). - * @type: One of the IIO_VAL_... constants. This decides how the val + * @type: One of the IIO_VAL_* constants. This decides how the val * and val2 parameters are formatted. * @size: Number of IIO value entries contained in vals * @vals: Pointer to the values, exact meaning depends on the @@ -639,7 +639,7 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type, * * Return: 0 by default, a negative number on failure or the * total number of characters written for a type that belongs - * to the IIO_VAL_... constant. + * to the IIO_VAL_* constant. */ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals) { -- cgit v1.2.3 From ad44a9f804c1591ba2a2ec0ac8d916a515d2790c Mon Sep 17 00:00:00 2001 From: Peter Meerwald-Stadler Date: Fri, 27 Oct 2017 21:45:31 +0200 Subject: iio: health: max30102: Temperature should be in milli Celsius As per ABI temperature should be in milli Celsius after scaling, not Celsius Note on stable cc. This driver is breaking the standard IIO ABI. (JC) Signed-off-by: Peter Meerwald-Stadler Acked-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c index 203ffb9cad6a..147a8c14235f 100644 --- a/drivers/iio/health/max30102.c +++ b/drivers/iio/health/max30102.c @@ -371,7 +371,7 @@ static int max30102_read_raw(struct iio_dev *indio_dev, mutex_unlock(&indio_dev->mlock); break; case IIO_CHAN_INFO_SCALE: - *val = 1; /* 0.0625 */ + *val = 1000; /* 62.5 */ *val2 = 16; ret = IIO_VAL_FRACTIONAL; break; -- cgit v1.2.3 From 6d745ee8b5e81f3a33791e3c854fbbfd6f3e585e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 14:56:50 +0200 Subject: iio: stm32: fix adc/trigger link error The ADC driver can trigger on either the timer or the lptim trigger, but it only uses a Kconfig 'select' statement to ensure that the first of the two is present. When the lptim trigger is enabled as a loadable module, and the adc driver is built-in, we now get a link error: drivers/iio/adc/stm32-adc.o: In function `stm32_adc_get_trig_extsel': stm32-adc.c:(.text+0x4e0): undefined reference to `is_stm32_lptim_trigger' We could use a second 'select' statement and always have both trigger drivers enabled when the adc driver is, but it seems that the lptimer trigger was intentionally left optional, so it seems better to keep it that way. This adds a hack to use 'IS_REACHABLE()' rather than 'IS_ENABLED()', which avoids the link error, but instead leads to the lptimer trigger not being used in the broken configuration. I've added a runtime warning for this case to help users figure out what they did wrong if this should ever be done by accident. Fixes: f0b638a7f6db ("iio: adc: stm32: add support for lptimer triggers") Signed-off-by: Arnd Bergmann Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-lptim-trigger.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h index 34d59bfdce2d..464458d20b16 100644 --- a/include/linux/iio/timer/stm32-lptim-trigger.h +++ b/include/linux/iio/timer/stm32-lptim-trigger.h @@ -16,11 +16,14 @@ #define LPTIM2_OUT "lptim2_out" #define LPTIM3_OUT "lptim3_out" -#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER) +#if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER) bool is_stm32_lptim_trigger(struct iio_trigger *trig); #else static inline bool is_stm32_lptim_trigger(struct iio_trigger *trig) { +#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER) + pr_warn_once("stm32 lptim_trigger not linked in\n"); +#endif return false; } #endif -- cgit v1.2.3 From e814bccbafece52a24e152d2395b5d49eef55841 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Nov 2017 15:20:03 +0000 Subject: scripts/kernel-doc: Don't fail with status != 0 if error encountered with -none My bisect scripts starting running into build failures when trying to compile 4.15-rc1 with the builds failing with things like: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:2078: error: Cannot parse struct or union! The line in question is actually just a #define, but after some digging it turns out that my scripts pass W=1 and since commit 3a025e1d1c2ea ("Add optional check for bad kernel-doc comments") that results in kernel-doc running on each source file. The file in question has a badly formatted comment immediately before the #define: /** * struct brcmf_skbuff_cb reserves first two bytes in sk_buff::cb for * bus layer usage. */ which causes the regex in dump_struct to fail (lack of braces following struct declaration) and kernel-doc returns 1, which causes the build to fail. Fix the issue by always returning 0 from kernel-doc when invoked with -none. It successfully generates no documentation, and prints out any issues. Cc: Matthew Wilcox Cc: Jonathan Corbet Signed-off-by: Will Deacon Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index bd29a92b4b48..df0f045a9a89 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -3248,4 +3248,4 @@ if ($verbose && $warnings) { print STDERR "$warnings warnings\n"; } -exit($errors); +exit($output_mode eq "none" ? 0 : $errors); -- cgit v1.2.3 From e50806a9fd0132402a62cb3161d786335662072f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 29 Nov 2017 00:16:56 +0900 Subject: dmaengine: doc : Fix warning "Title underline too short" while make xmldocs This patch fix following warning during 'make xmldocs' Documentation/driver-api/dmaengine/client.rst:188: WARNING: Title underline too short. Further APIs: ------------ Signed-off-by: Masanari Iida Signed-off-by: Jonathan Corbet --- Documentation/driver-api/dmaengine/client.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst index 6245c99af8c1..fbbb2831f29f 100644 --- a/Documentation/driver-api/dmaengine/client.rst +++ b/Documentation/driver-api/dmaengine/client.rst @@ -185,7 +185,7 @@ The details of these operations are: void dma_async_issue_pending(struct dma_chan *chan); Further APIs: ------------- +------------- 1. Terminate APIs -- cgit v1.2.3 From 0f83aaa3c07a46b41b2d5d8ee6b88617eb2c2f09 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Tue, 28 Nov 2017 06:16:21 +0100 Subject: genericirq.rst: Remove :c:func:`...` in code blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In code blocks, :c:func:`...` annotations don't result in cross-references. Instead, they are rendered verbatim. Remove these broken annotations, and mark function calls with parentheses() again. Fixes: 76d40fae1351 ("genericirq.rst: add cross-reference links and use monospaced fonts") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/core-api/genericirq.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst index 0054bd48be84..4da67b65cecf 100644 --- a/Documentation/core-api/genericirq.rst +++ b/Documentation/core-api/genericirq.rst @@ -225,9 +225,9 @@ interrupts. The following control flow is implemented (simplified excerpt):: - :c:func:`desc->irq_data.chip->irq_mask_ack`; + desc->irq_data.chip->irq_mask_ack(); handle_irq_event(desc->action); - :c:func:`desc->irq_data.chip->irq_unmask`; + desc->irq_data.chip->irq_unmask(); Default Fast EOI IRQ flow handler @@ -239,7 +239,7 @@ which only need an EOI at the end of the handler. The following control flow is implemented (simplified excerpt):: handle_irq_event(desc->action); - :c:func:`desc->irq_data.chip->irq_eoi`; + desc->irq_data.chip->irq_eoi(); Default Edge IRQ flow handler @@ -251,15 +251,15 @@ interrupts. The following control flow is implemented (simplified excerpt):: if (desc->status & running) { - :c:func:`desc->irq_data.chip->irq_mask_ack`; + desc->irq_data.chip->irq_mask_ack(); desc->status |= pending | masked; return; } - :c:func:`desc->irq_data.chip->irq_ack`; + desc->irq_data.chip->irq_ack(); desc->status |= running; do { if (desc->status & masked) - :c:func:`desc->irq_data.chip->irq_unmask`; + desc->irq_data.chip->irq_unmask(); desc->status &= ~pending; handle_irq_event(desc->action); } while (status & pending); @@ -293,10 +293,10 @@ simplified version without locking. The following control flow is implemented (simplified excerpt):: if (desc->irq_data.chip->irq_ack) - :c:func:`desc->irq_data.chip->irq_ack`; + desc->irq_data.chip->irq_ack(); handle_irq_event(desc->action); if (desc->irq_data.chip->irq_eoi) - :c:func:`desc->irq_data.chip->irq_eoi`; + desc->irq_data.chip->irq_eoi(); EOI Edge IRQ flow handler -- cgit v1.2.3 From a08415ea2a8b3f0173fd0612317d5cba5b57fedb Mon Sep 17 00:00:00 2001 From: John Pittman Date: Wed, 22 Nov 2017 12:56:22 -0500 Subject: scsi: documentation: Fix case of 'scsi_device' struct mention(s) In scsi_mid_low_api.txt a the scsi_device structure is mentioned several times, but the leading 's' is uppercase (Scsi_device) and should be lowercase (scsi_device). Fixed by this commit. Signed-off-by: John Pittman Signed-off-by: Jonathan Corbet --- Documentation/scsi/scsi_mid_low_api.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 6338400eed73..2c31d9ee6776 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -319,12 +319,12 @@ struct Scsi_Host: instance. If the reference count reaches 0 then the given instance is freed -The Scsi_device structure has had reference counting infrastructure added. -This effectively spreads the ownership of struct Scsi_device instances +The scsi_device structure has had reference counting infrastructure added. +This effectively spreads the ownership of struct scsi_device instances across the various SCSI layers which use them. Previously such instances were exclusively owned by the mid level. See the access functions declared towards the end of include/scsi/scsi_device.h . If an LLD wants to keep -a copy of a pointer to a Scsi_device instance it should use scsi_device_get() +a copy of a pointer to a scsi_device instance it should use scsi_device_get() to bump its reference count. When it is finished with the pointer it can use scsi_device_put() to decrement its reference count (and potentially delete it). -- cgit v1.2.3 From 8a64e557f399090f5d1917b2f32a065da2b12be1 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 16 Nov 2017 23:18:32 +0300 Subject: gpio: pca953x: fix vendor prefix for PCA9654 Despite commit 55020c8056a8 ("of: Add vendor prefix for ON Semiconductor Corp.") was made long ago, the latter commit 9f49f6dd0473 ("gpio: pca953x: add onsemi,pca9654 id") made use of another, undocumented vendor prefix. Since such prefix doesn't seem to be used in any device trees, I think we can just fix the "compatible" string in the driver and the bindings and be done with that... Fixes: 9f49f6dd0473 ("gpio: pca953x: add onsemi,pca9654 id") Signed-off-by: Sergei Shtylyov Acked-by: Rob Herring Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/gpio/gpio-pca953x.txt | 2 +- drivers/gpio/gpio-pca953x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt index 7f57271df2bc..0d0158728f89 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt @@ -27,7 +27,7 @@ Required properties: ti,tca6424 ti,tca9539 ti,tca9554 - onsemi,pca9654 + onnn,pca9654 exar,xra1202 Optional properties: diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index babb7bd2ba59..a0a5f9730aa7 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -947,7 +947,7 @@ static const struct of_device_id pca953x_dt_ids[] = { { .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), }, { .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), }, - { .compatible = "onsemi,pca9654", .data = OF_953X( 8, PCA_INT), }, + { .compatible = "onnn,pca9654", .data = OF_953X( 8, PCA_INT), }, { .compatible = "exar,xra1202", .data = OF_953X( 8, 0), }, { } -- cgit v1.2.3 From a7f3f939dd7d8398acebecd1ceb2e9e7ffbe91d2 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Fri, 1 Dec 2017 03:13:02 -0500 Subject: bnxt_en: Need to unconditionally shut down RoCE in bnxt_shutdown The current 'bnxt_shutdown' implementation only invokes 'bnxt_ulp_shutdown' to shut down RoCE in the case when the system is in the path of power off (SYSTEM_POWER_OFF). While this may work in most cases, it does not work in the smart NIC case, when Linux 'reboot' command is initiated from the Linux that runs on the ARM cores of the NIC card. In this particular case, Linux 'reboot' results in a system 'L3' level reset where the entire ARM and associated subsystems are being reset, but at the same time, Nitro core is being kept in sane state (to allow external PCIe connected servers to continue to work). Without properly shutting down RoCE and freeing all associated resources, it results in the ARM core to hang immediately after the 'reboot' By always invoking 'bnxt_ulp_shutdown' in 'bnxt_shutdown', it fixes the above issue Fixes: 0efd2fc65c92 ("bnxt_en: Add a callback to inform RDMA driver during PCI shutdown.") Signed-off-by: Ray Jui Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c5c38d4b7d1c..7f173eb42aa2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8263,8 +8263,9 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) dev_close(dev); + bnxt_ulp_shutdown(bp); + if (system_state == SYSTEM_POWER_OFF) { - bnxt_ulp_shutdown(bp); bnxt_clear_int_mode(bp); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); -- cgit v1.2.3 From c8fb7b8259c67b86cd93a71c85e78b34d2c96fdc Mon Sep 17 00:00:00 2001 From: Sunil Challa Date: Fri, 1 Dec 2017 03:13:03 -0500 Subject: bnxt_en: wildcard smac while creating tunnel decap filter While creating a decap filter the tunnel smac need not (and must not) be specified as we cannot ascertain the neighbor in the recv path. 'ttl' match is also not needed for the decap filter and must be wild-carded. Fixes: f484f6782e01 ("bnxt_en: add hwrm FW cmds for cfa_encap_record and decap_filter") Signed-off-by: Sunil Challa Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index d5031f436f83..96bff48af971 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -532,10 +532,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, } if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) { - enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR | - CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR; + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR; ether_addr_copy(req.dst_macaddr, l2_info->dmac); - ether_addr_copy(req.src_macaddr, l2_info->smac); } if (l2_info->num_vlans) { enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID; @@ -1012,10 +1010,9 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, if (rc) goto put_decap; - decap_key->ttl = tun_key.ttl; decap_l2_info = &decap_node->l2_info; + /* decap smac is wildcarded */ ether_addr_copy(decap_l2_info->dmac, l2_info.smac); - ether_addr_copy(decap_l2_info->smac, l2_info.dmac); if (l2_info.num_vlans) { decap_l2_info->num_vlans = l2_info.num_vlans; decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid; -- cgit v1.2.3 From e9ecc731a87912d209d6e9b4ed20ed70451c08cb Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Fri, 1 Dec 2017 03:13:04 -0500 Subject: bnxt_en: fix dst/src fid for vxlan encap/decap actions For flows that involve a vxlan encap action, the vxlan sock interface may be specified as the outgoing interface. The driver must resolve the outgoing PF interface used by this socket and use the dst_fid of the PF in the hwrm_cfa_encap_record_alloc cmd. Similarily for flows that have a vxlan decap action, the fid of the incoming PF interface must be used as the src_fid in the hwrm_cfa_decap_filter_alloc cmd. Fixes: 8c95f773b4a3 ("bnxt_en: add support for Flower based vxlan encap/decap offload") Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 48 +++++++++++++++------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 96bff48af971..3d201d7324bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -56,7 +56,6 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, { int ifindex = tcf_mirred_ifindex(tc_act); struct net_device *dev; - u16 dst_fid; dev = __dev_get_by_index(dev_net(bp->dev), ifindex); if (!dev) { @@ -64,15 +63,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, return -EINVAL; } - /* find the FID from dev */ - dst_fid = bnxt_flow_get_dst_fid(bp, dev); - if (dst_fid == BNXT_FID_INVALID) { - netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex); - return -EINVAL; - } - actions->flags |= BNXT_TC_ACTION_FLAG_FWD; - actions->dst_fid = dst_fid; actions->dst_dev = dev; return 0; } @@ -160,13 +151,17 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, if (rc) return rc; - /* Tunnel encap/decap action must be accompanied by a redirect action */ - if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP || - actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) && - !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) { - netdev_info(bp->dev, - "error: no redir action along with encap/decap"); - return -EINVAL; + if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) { + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { + /* dst_fid is PF's fid */ + actions->dst_fid = bp->pf.fw_fid; + } else { + /* find the FID from dst_dev */ + actions->dst_fid = + bnxt_flow_get_dst_fid(bp, actions->dst_dev); + if (actions->dst_fid == BNXT_FID_INVALID) + return -EINVAL; + } } return rc; @@ -899,10 +894,10 @@ static void bnxt_tc_put_decap_handle(struct bnxt *bp, static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, struct ip_tunnel_key *tun_key, - struct bnxt_tc_l2_key *l2_info, - struct net_device *real_dst_dev) + struct bnxt_tc_l2_key *l2_info) { #ifdef CONFIG_INET + struct net_device *real_dst_dev = bp->dev; struct flowi4 flow = { {0} }; struct net_device *dst_dev; struct neighbour *nbr; @@ -1006,7 +1001,7 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, */ tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src; tun_key.tp_dst = flow->tun_key.tp_dst; - rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev); + rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info); if (rc) goto put_decap; @@ -1092,8 +1087,7 @@ static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) goto done; - rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info, - flow->actions.dst_dev); + rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info); if (rc) goto put_encap; @@ -1166,6 +1160,15 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, return 0; } +static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow, + u16 src_fid) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + flow->src_fid = bp->pf.fw_fid; + else + flow->src_fid = src_fid; +} + /* Add a new flow or replace an existing flow. * Notes on locking: * There are essentially two critical sections here. @@ -1201,7 +1204,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow); if (rc) goto free_node; - flow->src_fid = src_fid; + + bnxt_tc_set_src_fid(bp, flow, src_fid); if (!bnxt_tc_can_offload(bp, flow)) { rc = -ENOSPC; -- cgit v1.2.3 From ebd5818cc5d4847897d7fe872e2d9799d7b7fcbb Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 1 Dec 2017 03:13:05 -0500 Subject: bnxt_en: Fix a variable scoping in bnxt_hwrm_do_send_msg() short_input variable is assigned to another data pointer which is referred out of its scope. Fix it by moving short_input definition to the beginning of bnxt_hwrm_do_send_msg() function. No failure has been reported so far due to this issue. Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7f173eb42aa2..28f5e94274ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3368,6 +3368,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u16 cp_ring_id, len = 0; struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr; u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN; + struct hwrm_short_input short_input = {0}; req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++); memset(resp, 0, PAGE_SIZE); @@ -3376,7 +3377,6 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (bp->flags & BNXT_FLAG_SHORT_CMD) { void *short_cmd_req = bp->hwrm_short_cmd_req_addr; - struct hwrm_short_input short_input = {0}; memcpy(short_cmd_req, req, msg_len); memset(short_cmd_req + msg_len, 0, BNXT_HWRM_MAX_REQ_LEN - -- cgit v1.2.3 From 6e474083f3daf3a3546737f5d7d502ad12eb257c Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:36 -0500 Subject: vhost: fix skb leak in handle_rx() Matthew found a roughly 40% tcp throughput regression with commit c67df11f(vhost_net: try batch dequing from skb array) as discussed in the following thread: https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html Eventually we figured out that it was a skb leak in handle_rx() when sending packets to the VM. This usually happens when a guest can not drain out vq as fast as vhost fills in, afterwards it sets off the traffic jam and leaks skb(s) which occurs as no headcount to send on the vq from vhost side. This can be avoided by making sure we have got enough headcount before actually consuming a skb from the batched rx array while transmitting, which is simply done by moving checking the zero headcount a bit ahead. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8d626d7c2e7e..c7bdeb655646 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -778,16 +778,6 @@ static void handle_rx(struct vhost_net *net) /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) goto out; - if (nvq->rx_array) - msg.msg_control = vhost_net_buf_consume(&nvq->rxq); - /* On overrun, truncate and discard */ - if (unlikely(headcount > UIO_MAXIOV)) { - iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); - err = sock->ops->recvmsg(sock, &msg, - 1, MSG_DONTWAIT | MSG_TRUNC); - pr_debug("Discarded rx packet: len %zd\n", sock_len); - continue; - } /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { @@ -800,6 +790,16 @@ static void handle_rx(struct vhost_net *net) * they refilled. */ goto out; } + if (nvq->rx_array) + msg.msg_control = vhost_net_buf_consume(&nvq->rxq); + /* On overrun, truncate and discard */ + if (unlikely(headcount > UIO_MAXIOV)) { + iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); + err = sock->ops->recvmsg(sock, &msg, + 1, MSG_DONTWAIT | MSG_TRUNC); + pr_debug("Discarded rx packet: len %zd\n", sock_len); + continue; + } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); fixup = msg.msg_iter; -- cgit v1.2.3 From c33ee15b3820a03cf8229ba9415084197b827f8c Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:37 -0500 Subject: tun: free skb in early errors tun_recvmsg() supports accepting skb by msg_control after commit ac77cfd4258f ("tun: support receiving skb through msg_control"), the skb if presented should be freed no matter how far it can go along, otherwise it would be leaked. This patch fixes several missed cases. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 95749006d687..4f4a842a1c9c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1952,8 +1952,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, tun_debug(KERN_INFO, tun, "tun_do_read\n"); - if (!iov_iter_count(to)) + if (!iov_iter_count(to)) { + if (skb) + kfree_skb(skb); return 0; + } if (!skb) { /* Read frames from ring */ @@ -2069,22 +2072,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); + struct sk_buff *skb = m->msg_control; int ret; - if (!tun) - return -EBADFD; + if (!tun) { + ret = -EBADFD; + goto out_free_skb; + } if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { ret = -EINVAL; - goto out; + goto out_put_tun; } if (flags & MSG_ERRQUEUE) { ret = sock_recv_errqueue(sock->sk, m, total_len, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, - m->msg_control); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@ -2092,6 +2097,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, out: tun_put(tun); return ret; + +out_put_tun: + tun_put(tun); +out_free_skb: + if (skb) + kfree_skb(skb); + return ret; } static int tun_peek_len(struct socket *sock) -- cgit v1.2.3 From 61d78537843e676e7f56ac6db333db0c0529b892 Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:38 -0500 Subject: tap: free skb if flags error tap_recvmsg() supports accepting skb by msg_control after commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"), the skb if presented should be freed within the function, otherwise it would be leaked. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e9489b88407c..0a886fda0129 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q, DEFINE_WAIT(wait); ssize_t ret = 0; - if (!iov_iter_count(to)) + if (!iov_iter_count(to)) { + if (skb) + kfree_skb(skb); return 0; + } if (skb) goto put; @@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); + struct sk_buff *skb = m->msg_control; int ret; - if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { + if (skb) + kfree_skb(skb); return -EINVAL; - ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, - m->msg_control); + } + ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; -- cgit v1.2.3 From bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:49 +0100 Subject: s390/qeth: fix thinko in IPv4 multicast address tracking Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked how secondary addresses are managed for qeth devices. Instead of dropping & subsequently re-adding all addresses on every ndo_set_rx_mode() call, qeth now keeps track of the addresses that are currently registered with the HW. On a ndo_set_rx_mode(), we thus only need to do (de-)registration requests for the addresses that have actually changed. On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong hashtable - and thus never finds a match. As a result, we first delete *all* such addresses, and then re-add them again. So each set_rx_mode() causes a short period where the IPv4 Multicast addresses are not registered, and the card stops forwarding inbound traffic for them. Fix this by setting the ->is_multicast flag on the lookup object, thus enabling qeth_l3_ip_from_hash() to search the correct hashtable and find a match there. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aadd384316a3..e79936b50698 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1376,6 +1376,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev) tmp->u.a4.addr = be32_to_cpu(im4->multiaddr); memcpy(tmp->mac, buf, sizeof(tmp->mac)); + tmp->is_multicast = 1; ipm = qeth_l3_ip_from_hash(card, tmp); if (ipm) { -- cgit v1.2.3 From 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:50 +0100 Subject: s390/qeth: fix GSO throughput regression Using GSO with small MTUs currently results in a substantial throughput regression - which is caused by how qeth needs to map non-linear skbs into its IO buffer elements: compared to a linear skb, each GSO-segmented skb effectively consumes twice as many buffer elements (ie two instead of one) due to the additional header-only part. This causes the Output Queue to be congested with low-utilized IO buffers. Fix this as follows: If the MSS is low enough so that a non-SG GSO segmentation produces order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is where we anticipate the biggest savings, since an SG-enabled GSO segmentation produces skbs that always consume at least two buffer elements. Larger MSS values continue to get a SG-enabled GSO segmentation, since 1) the relative overhead of the additional header-only buffer element becomes less noticeable, and 2) the linearization overhead increases. With the throughput regression fixed, re-enable NETIF_F_SG by default to reap the significant CPU savings of GSO. Fixes: 5722963a8e83 ("qeth: do not turn on SG per default") Reported-by: Nils Hoppmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +++ drivers/s390/net/qeth_core_main.c | 31 +++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 2 ++ 4 files changed, 38 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 9cd569ef43ec..15015a24f8ad 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -987,6 +987,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, int qeth_set_features(struct net_device *, netdev_features_t); void qeth_recover_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); int qeth_vm_request_mac(struct qeth_card *card); int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49b9efeba1bd..d9b0e07d4fa7 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include + #include #include @@ -6438,6 +6443,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev, } EXPORT_SYMBOL_GPL(qeth_fix_features); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* GSO segmentation builds skbs with + * a (small) linear part for the headers, and + * page frags for the data. + * Compared to a linear skb, the header-only part consumes an + * additional buffer element. This reduces buffer utilization, and + * hurts throughput. So compress small segments into one element. + */ + if (netif_needs_gso(skb, features)) { + /* match skb_segment(): */ + unsigned int doffset = skb->data - skb_mac_header(skb); + unsigned int hsize = skb_shinfo(skb)->gso_size; + unsigned int hroom = skb_headroom(skb); + + /* linearize only if resulting skb allocations are order-0: */ + if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0)) + features &= ~NETIF_F_SG; + } + + return vlan_features_check(skb, features); +} +EXPORT_SYMBOL_GPL(qeth_features_check); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d2537c09126d..85162712d207 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -960,6 +960,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_stop = qeth_l2_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_do_ioctl, @@ -1010,6 +1011,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; card->dev->vlan_features = NETIF_F_SG; + card->dev->features |= NETIF_F_SG; /* OSA 3S and earlier has no RX/TX support */ if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) { card->dev->hw_features |= NETIF_F_IP_CSUM; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index e79936b50698..46a841258fc8 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2918,6 +2918,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_stop = qeth_l3_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_multicast_list, .ndo_do_ioctl = qeth_do_ioctl, @@ -2958,6 +2959,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; + card->dev->features |= NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { -- cgit v1.2.3 From 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:51 +0100 Subject: s390/qeth: build max size GSO skbs on L2 devices The current GSO skb size limit was copy&pasted over from the L3 path, where it is needed due to a TSO limitation. As L2 devices don't offer TSO support (and thus all GSO skbs are segmented before they reach the driver), there's no reason to restrict the stack in how large it may build the GSO skbs. Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 -- drivers/s390/net/qeth_l3_main.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 85162712d207..f21c94810373 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1030,8 +1030,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 46a841258fc8..2a25f20566d8 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2987,8 +2987,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netif_keep_dst(card->dev); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; + netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * + PAGE_SIZE); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); -- cgit v1.2.3 From a98a4ebc8c61d20f0150d6be66e0e65223a347af Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 1 Dec 2017 09:58:42 +0800 Subject: ipvlan: Add the skb->mark as flow4's member to lookup route Current codes don't use skb->mark to assign flowi4_mark, it would make the policy route rule with fwmark doesn't work as expected. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 11c1e7950fe5..77cc4fbaeace 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -393,6 +393,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) .flowi4_oif = dev->ifindex, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; -- cgit v1.2.3 From 45ab4b13e46325d00f4acdb365d406e941a15f81 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Fri, 1 Dec 2017 11:12:44 +0100 Subject: stmmac: reset last TSO segment size after device open The mss variable tracks the last max segment size sent to the TSO engine. We do not update the hardware as long as we receive skb:s with the same value in gso_size. During a network device down/up cycle (mapped to stmmac_release() and stmmac_open() callbacks) we issue a reset to the hardware and it forgets the setting for mss. However we did not zero out our mss variable so the next transmission of a gso packet happens with an undefined hardware setting. This triggers a hang in the TSO engine and eventuelly the netdev watchdog will bark. Fixes: f748be531d70 ("stmmac: support new GMAC4") Signed-off-by: Lars Persson Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f63c2ddced3c..d7250539d0bd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2588,6 +2588,7 @@ static int stmmac_open(struct net_device *dev) priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); priv->rx_copybreak = STMMAC_RX_COPYBREAK; + priv->mss = 0; ret = alloc_dma_desc_resources(priv); if (ret < 0) { -- cgit v1.2.3 From 886afc1dc489436bf2c4fadf0f3aecacd7269234 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Dec 2017 12:38:11 +0000 Subject: liquidio: fix incorrect indentation of assignment statement Remove one extraneous level of indentation on assignment statement. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 6aa0eee88ea5..a5eecd895a82 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1113,7 +1113,7 @@ static int liquidio_watchdog(void *param) dev_err(&oct->pci_dev->dev, "ERROR: Octeon core %d crashed or got stuck! See oct-fwdump for details.\n", core); - err_msg_was_printed[core] = true; + err_msg_was_printed[core] = true; } } -- cgit v1.2.3 From c501256406fb19c306504ee1fe41a4ea208d4245 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:09:53 +0000 Subject: rxrpc: Use correct netns source in rxrpc_release_sock() In rxrpc_release_sock() there may be no rx->local value to access, so we can't unconditionally follow it to the rxrpc network namespace information to poke the connection reapers. Instead, use the socket's namespace pointer to find the namespace. This unfixed code causes the following static checker warning: net/rxrpc/af_rxrpc.c:898 rxrpc_release_sock() error: we previously assumed 'rx->local' could be null (see line 887) Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers") Reported-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8f7cf4c042be..dcd818fa837e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -860,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk) static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); @@ -895,8 +896,8 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper); - rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper); + rxrpc_queue_work(&rxnet->service_conn_reaper); + rxrpc_queue_work(&rxnet->client_conn_reaper); rxrpc_put_local(rx->local); rx->local = NULL; -- cgit v1.2.3 From bcd1d601e5cc760bf5743a59e4716603490e281c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:10:37 +0000 Subject: rxrpc: Fix the MAINTAINERS record Fix the MAINTAINERS record so that it's more obvious who the maintainer for AF_RXRPC is. Reported-by: Joe Perches Reported-by: David Miller Signed-off-by: David Howells Signed-off-by: David S. Miller --- MAINTAINERS | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 77d819b458a9..511b858405bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,13 +554,13 @@ S: Orphan F: Documentation/filesystems/affs.txt F: fs/affs/ -AFS FILESYSTEM & AF_RXRPC SOCKET DOMAIN +AFS FILESYSTEM M: David Howells L: linux-afs@lists.infradead.org S: Supported F: fs/afs/ -F: include/net/af_rxrpc.h -F: net/rxrpc/af_rxrpc.c +F: include/trace/events/afs.h +F: Documentation/filesystems/afs.txt W: https://www.infradead.org/~dhowells/kafs/ AGPGART DRIVER @@ -11777,6 +11777,18 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve S: Maintained F: drivers/net/wireless/realtek/rtl8xxxu/ +RXRPC SOCKETS (AF_RXRPC) +M: David Howells +L: linux-afs@lists.infradead.org +S: Supported +F: net/rxrpc/ +F: include/keys/rxrpc-type.h +F: include/net/af_rxrpc.h +F: include/trace/events/rxrpc.h +F: include/uapi/linux/rxrpc.h +F: Documentation/networking/rxrpc.txt +W: https://www.infradead.org/~dhowells/kafs/ + S3 SAVAGE FRAMEBUFFER DRIVER M: Antonino Daplas L: linux-fbdev@vger.kernel.org -- cgit v1.2.3 From ae64f9bd1d3621b5e60d7363bc20afb46aede215 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Dec 2017 11:01:47 -0500 Subject: Linux 4.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f761bf475ba5..c988e46a53cd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From eeea10b83a139451130df1594f26710c8fa390c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Dec 2017 09:32:59 -0800 Subject: tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb() James Morris reported kernel stack corruption bug [1] while running the SELinux testsuite, and bisected to a recent commit bffa72cf7f9d ("net: sk_buff rbnode reorg") We believe this commit is fine, but exposes an older bug. SELinux code runs from tcp_filter() and might send an ICMP, expecting IP options to be found in skb->cb[] using regular IPCB placement. We need to defer TCP mangling of skb->cb[] after tcp_filter() calls. This patch adds tcp_v4_fill_cb()/tcp_v4_restore_cb() in a very similar way we added them for IPv6. [1] [ 339.806024] SELinux: failure in selinux_parse_skb(), unable to parse packet [ 339.822505] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81745af5 [ 339.822505] [ 339.852250] CPU: 4 PID: 3642 Comm: client Not tainted 4.15.0-rc1-test #15 [ 339.868498] Hardware name: LENOVO 10FGS0VA1L/30BC, BIOS FWKT68A 01/19/2017 [ 339.885060] Call Trace: [ 339.896875] [ 339.908103] dump_stack+0x63/0x87 [ 339.920645] panic+0xe8/0x248 [ 339.932668] ? ip_push_pending_frames+0x33/0x40 [ 339.946328] ? icmp_send+0x525/0x530 [ 339.958861] ? kfree_skbmem+0x60/0x70 [ 339.971431] __stack_chk_fail+0x1b/0x20 [ 339.984049] icmp_send+0x525/0x530 [ 339.996205] ? netlbl_skbuff_err+0x36/0x40 [ 340.008997] ? selinux_netlbl_err+0x11/0x20 [ 340.021816] ? selinux_socket_sock_rcv_skb+0x211/0x230 [ 340.035529] ? security_sock_rcv_skb+0x3b/0x50 [ 340.048471] ? sk_filter_trim_cap+0x44/0x1c0 [ 340.061246] ? tcp_v4_inbound_md5_hash+0x69/0x1b0 [ 340.074562] ? tcp_filter+0x2c/0x40 [ 340.086400] ? tcp_v4_rcv+0x820/0xa20 [ 340.098329] ? ip_local_deliver_finish+0x71/0x1a0 [ 340.111279] ? ip_local_deliver+0x6f/0xe0 [ 340.123535] ? ip_rcv_finish+0x3a0/0x3a0 [ 340.135523] ? ip_rcv_finish+0xdb/0x3a0 [ 340.147442] ? ip_rcv+0x27c/0x3c0 [ 340.158668] ? inet_del_offload+0x40/0x40 [ 340.170580] ? __netif_receive_skb_core+0x4ac/0x900 [ 340.183285] ? rcu_accelerate_cbs+0x5b/0x80 [ 340.195282] ? __netif_receive_skb+0x18/0x60 [ 340.207288] ? process_backlog+0x95/0x140 [ 340.218948] ? net_rx_action+0x26c/0x3b0 [ 340.230416] ? __do_softirq+0xc9/0x26a [ 340.241625] ? do_softirq_own_stack+0x2a/0x40 [ 340.253368] [ 340.262673] ? do_softirq+0x50/0x60 [ 340.273450] ? __local_bh_enable_ip+0x57/0x60 [ 340.285045] ? ip_finish_output2+0x175/0x350 [ 340.296403] ? ip_finish_output+0x127/0x1d0 [ 340.307665] ? nf_hook_slow+0x3c/0xb0 [ 340.318230] ? ip_output+0x72/0xe0 [ 340.328524] ? ip_fragment.constprop.54+0x80/0x80 [ 340.340070] ? ip_local_out+0x35/0x40 [ 340.350497] ? ip_queue_xmit+0x15c/0x3f0 [ 340.361060] ? __kmalloc_reserve.isra.40+0x31/0x90 [ 340.372484] ? __skb_clone+0x2e/0x130 [ 340.382633] ? tcp_transmit_skb+0x558/0xa10 [ 340.393262] ? tcp_connect+0x938/0xad0 [ 340.403370] ? ktime_get_with_offset+0x4c/0xb0 [ 340.414206] ? tcp_v4_connect+0x457/0x4e0 [ 340.424471] ? __inet_stream_connect+0xb3/0x300 [ 340.435195] ? inet_stream_connect+0x3b/0x60 [ 340.445607] ? SYSC_connect+0xd9/0x110 [ 340.455455] ? __audit_syscall_entry+0xaf/0x100 [ 340.466112] ? syscall_trace_enter+0x1d0/0x2b0 [ 340.476636] ? __audit_syscall_exit+0x209/0x290 [ 340.487151] ? SyS_connect+0xe/0x10 [ 340.496453] ? do_syscall_64+0x67/0x1b0 [ 340.506078] ? entry_SYSCALL64_slow_path+0x25/0x25 Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") Signed-off-by: Eric Dumazet Reported-by: James Morris Tested-by: James Morris Tested-by: Casey Schaufler Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 59 ++++++++++++++++++++++++++++++++++++----------------- net/ipv6/tcp_ipv6.c | 10 +++++---- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6bc0c4d19c6..77ea45da0fe9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_filter); +static void tcp_v4_restore_cb(struct sk_buff *skb) +{ + memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4, + sizeof(struct inet_skb_parm)); +} + +static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, + const struct tcphdr *th) +{ + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() + * barrier() makes sure compiler wont play fool^Waliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), + sizeof(struct inet_skb_parm)); + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff * 4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->has_rxtstamp = + skb->tstamp || skb_hwtstamps(skb)->hwtstamp; +} + /* * From tcp_input.c */ @@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); - /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() - * barrier() makes sure compiler wont play fool^Waliasing games. - */ - memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), - sizeof(struct inet_skb_parm)); - barrier(); - - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff * 4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); - TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->has_rxtstamp = - skb->tstamp || skb_hwtstamps(skb)->hwtstamp; - lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); @@ -1679,14 +1689,19 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; } if (nsk == sk) { reqsk_put(req); + tcp_v4_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; @@ -1712,6 +1727,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); skb->dev = NULL; @@ -1742,6 +1758,8 @@ no_tcp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { csum_error: __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); @@ -1768,6 +1786,8 @@ do_time_wait: goto discard_it; } + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; @@ -1784,6 +1804,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + tcp_v4_restore_cb(skb); refcounted = false; goto process; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index be11dc13aa70..1f04ec0e4a7a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1454,7 +1454,6 @@ process: struct sock *nsk; sk = req->rsk_listener; - tcp_v6_fill_cb(skb, hdr, th); if (tcp_v6_inbound_md5_hash(sk, skb)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1467,8 +1466,12 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; @@ -1492,8 +1495,6 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - tcp_v6_fill_cb(skb, hdr, th); - if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; @@ -1501,6 +1502,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); skb->dev = NULL; -- cgit v1.2.3 From b4d1605a8ea608fd7dc45b926a05d75d340bde4b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 3 Dec 2017 09:33:00 -0800 Subject: tcp: use IPCB instead of TCP_SKB_CB in inet_exact_dif_match() After this fix : ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"), socket lookups happen while skb->cb[] has not been mangled yet by TCP. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Signed-off-by: David Ahern Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4e09398009c1..6998707e81f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -844,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) } #endif -/* TCP_SKB_CB reference means this can not be used from early demux */ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) return true; #endif return false; -- cgit v1.2.3 From 9956cfef3409177d9e24ea4b7910148a18073a6f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 3 Dec 2017 13:15:39 -0800 Subject: Documentation: fix docs build error after source file removed The pci/htirq.c file was removed so remove it from the documentation file also. Error: Cannot open file ../drivers/pci/htirq.c WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -export ../drivers/pci/htirq.c' failed with return code 2 Fixes: fd2fa6c18b72 ("x86/PCI: Remove unused HyperTransport interrupt support") Signed-off-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- Documentation/driver-api/pci.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/driver-api/pci.rst b/Documentation/driver-api/pci.rst index 01a6c8b7d3a7..ca85e5e78b2c 100644 --- a/Documentation/driver-api/pci.rst +++ b/Documentation/driver-api/pci.rst @@ -25,9 +25,6 @@ PCI Support Library .. kernel-doc:: drivers/pci/irq.c :export: -.. kernel-doc:: drivers/pci/htirq.c - :export: - .. kernel-doc:: drivers/pci/probe.c :export: -- cgit v1.2.3 From 8ee5ad1d4c0ba93f96d0db31e98102e61ff7d12b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 4 Dec 2017 10:52:40 -0500 Subject: arch/tile: mark as orphaned The chip family of TILEPro and TILE-Gx was developed by Tilera, which was eventually acquired by Mellanox. The tile architecture was added to the kernel in 2010 and first appeared in 2.6.36. Now at Mellanox we are developing new chips based on the ARM64 architecture; our last TILE-Gx chip (the Gx72) was released in 2013, and our customers using tile architecture products are not, as far as we know, looking to upgrade to newer kernel releases. In the absence of someone in the community stepping up to take over maintainership, this commit marks the architecture as orphaned. Cc: Chris Metcalf Signed-off-by: Chris Metcalf Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d4fdcb12616c..603d49e877a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13647,10 +13647,8 @@ F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h TILE ARCHITECTURE -M: Chris Metcalf W: http://www.mellanox.com/repository/solutions/tile-scm/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git -S: Supported +S: Orphan F: arch/tile/ F: drivers/char/tile-srom.c F: drivers/edac/tile_edac.c -- cgit v1.2.3 From b7ad7ef742a99c148631d38a98da72e7db8f6fd0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Dec 2017 08:23:20 -0800 Subject: remove task and stack pointer printout from oops dump Geert Uytterhoeven reported a NFS oops, and pointed out that some of the numbers were hashed and useless. We could just turn them from '%p' into '%px', but those numbers are really just legacy, and useless even when not hashed. So just remove them entirely. Reported-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5d81206a572d..b9006617710f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3141,9 +3141,6 @@ void dump_stack_print_info(const char *log_lvl) void show_regs_print_info(const char *log_lvl) { dump_stack_print_info(log_lvl); - - printk("%stask: %p task.stack: %p\n", - log_lvl, current, task_stack_page(current)); } #endif -- cgit v1.2.3