diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 40 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 2 | ||||
-rw-r--r-- | fs/btrfs/space-info.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 6 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 | ||||
-rw-r--r-- | fs/cifs/cifs_debug.c | 6 | ||||
-rw-r--r-- | fs/cifs/connect.c | 10 | ||||
-rw-r--r-- | fs/cifs/inode.c | 10 | ||||
-rw-r--r-- | fs/cifs/transport.c | 2 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 5 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 10 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 1 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 3 | ||||
-rw-r--r-- | fs/gfs2/log.c | 25 | ||||
-rw-r--r-- | fs/gfs2/log.h | 4 | ||||
-rw-r--r-- | fs/gfs2/main.c | 1 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 13 | ||||
-rw-r--r-- | fs/gfs2/recovery.c | 4 | ||||
-rw-r--r-- | fs/gfs2/super.c | 20 | ||||
-rw-r--r-- | fs/io_uring.c | 29 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 |
25 files changed, 144 insertions, 77 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3a7648bff42c..82ab6e5a386d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); - /* Fallthrough */ + fallthrough; case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c6f0bbb54a5..b1a148058773 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2593,10 +2593,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) !extent_buffer_uptodate(tree_root->node)) { handle_error = true; - if (IS_ERR(tree_root->node)) + if (IS_ERR(tree_root->node)) { ret = PTR_ERR(tree_root->node); - else if (!extent_buffer_uptodate(tree_root->node)) + tree_root->node = NULL; + } else if (!extent_buffer_uptodate(tree_root->node)) { ret = -EUCLEAN; + } btrfs_warn(fs_info, "failed to read tree root"); continue; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68c96057ad2d..608f93438b29 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5058,25 +5058,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -5527,6 +5530,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7887317033c9..af92525dbb16 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root, switch (key.type) { case BTRFS_EXTENT_ITEM_KEY: *num_bytes = key.offset; - /* fall through */ + fallthrough; case BTRFS_METADATA_ITEM_KEY: *bytenr = key.objectid; ret = process_extent_item(fs_info, path, &key, i, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 41ee88633769..c7bd3fdd7792 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, return false; } global_rsv->reserved -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; - list_del_init(&ticket->list); wake_up(&ticket->wait); space_info->tickets_id++; if (global_rsv->reserved < global_rsv->size) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bc73fd670702..c3826ae883f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_compress_force: case Opt_compress_force_type: compress_force = true; - /* Fallthrough */ + fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, @@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); - /* Fallthrough */ + fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); @@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_recovery: btrfs_warn(info, "'recovery' is deprecated, use 'usebackuproot' instead"); - /* fall through */ + fallthrough; case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f067b5934c46..75af2334b2e3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) return BTRFS_MAP_WRITE; default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case REQ_OP_READ: return BTRFS_MAP_READ; } diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index fc98b97b396a..53588d7517b4 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -399,6 +399,10 @@ skip_rdma: if (ses->sign) seq_puts(m, " signed"); + seq_printf(m, "\n\tUser: %d Cred User: %d", + from_kuid(&init_user_ns, ses->linux_uid), + from_kuid(&init_user_ns, ses->cred_uid)); + if (ses->chan_count > 1) { seq_printf(m, "\n\n\tExtra Channels: %zu\n", ses->chan_count-1); @@ -406,7 +410,7 @@ skip_rdma: cifs_dump_channel(m, j, &ses->chans[j]); } - seq_puts(m, "\n\tShares:"); + seq_puts(m, "\n\n\tShares:"); j = 0; seq_printf(m, "\n\t%d) IPC: ", j); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5fac34f192af..a61abde09ffe 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -5306,9 +5306,15 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) vol_info->nocase = master_tcon->nocase; vol_info->nohandlecache = master_tcon->nohandlecache; vol_info->local_lease = master_tcon->local_lease; + vol_info->no_lease = master_tcon->no_lease; + vol_info->resilient = master_tcon->use_resilient; + vol_info->persistent = master_tcon->use_persistent; + vol_info->handle_timeout = master_tcon->handle_timeout; vol_info->no_linux_ext = !master_tcon->unix_ext; + vol_info->linux_ext = master_tcon->posix_extensions; vol_info->sectype = master_tcon->ses->sectype; vol_info->sign = master_tcon->ses->sign; + vol_info->seal = master_tcon->seal; rc = cifs_set_vol_auth(vol_info, master_tcon->ses); if (rc) { @@ -5334,10 +5340,6 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; - if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, vol_info); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ce95801e9b66..49c3ea8aa845 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2044,6 +2044,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; + bool new_target = d_really_is_negative(target_dentry); if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2120,8 +2121,13 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, */ unlink_target: - /* Try unlinking the target dentry if it's not negative */ - if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) { + /* + * If the target dentry was created during the rename, try + * unlinking it if it's not negative + */ + if (new_target && + d_really_is_positive(target_dentry) && + (rc == -EACCES || rc == -EEXIST)) { if (d_is_dir(target_dentry)) tmprc = cifs_rmdir(target_dir, target_dentry); else diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d11e31064679..84433d0653f9 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -523,7 +523,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, const int timeout, const int flags, unsigned int *instance) { - int rc; + long rc; int *credits; int optype; long int t; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2299dcc417ea..8545024a1401 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1899,7 +1899,10 @@ bool gfs2_delete_work_queued(const struct gfs2_glock *gl) static void flush_delete_work(struct gfs2_glock *gl) { - flush_delayed_work(&gl->gl_delete); + if (cancel_delayed_work(&gl->gl_delete)) { + queue_delayed_work(gfs2_delete_workqueue, + &gl->gl_delete, 0); + } gfs2_glock_queue_work(gl, 0); } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c84887769b5a..de1d5f1d9ff8 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -531,8 +531,7 @@ static int freeze_go_sync(struct gfs2_glock *gl) int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && - test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { @@ -545,8 +544,11 @@ static int freeze_go_sync(struct gfs2_glock *gl) gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | - GFS2_LFC_FREEZE_GO_SYNC); + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | + GFS2_LFC_FREEZE_GO_SYNC); + else /* read-only mounts */ + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } return 0; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 03ab11fab962..ca2ec02436ec 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -399,7 +399,6 @@ enum { GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, - GIF_ORDERED = 4, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, GIF_DEFERRED_DELETE = 7, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 370c3a4b31ac..6774865f5b5b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -207,10 +207,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (no_formal_ino && ip->i_no_formal_ino && no_formal_ino != ip->i_no_formal_ino) { + error = -ESTALE; if (inode->i_state & I_NEW) goto fail; iput(inode); - return ERR_PTR(-ESTALE); + return ERR_PTR(error); } if (inode->i_state & I_NEW) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3e4734431783..a76e55bc28eb 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -613,6 +613,12 @@ static int ip_cmp(void *priv, struct list_head *a, struct list_head *b) return 0; } +static void __ordered_del_inode(struct gfs2_inode *ip) +{ + if (!list_empty(&ip->i_ordered)) + list_del_init(&ip->i_ordered); +} + static void gfs2_ordered_write(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; @@ -623,8 +629,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) while (!list_empty(&sdp->sd_log_ordered)) { ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); if (ip->i_inode.i_mapping->nrpages == 0) { - test_and_clear_bit(GIF_ORDERED, &ip->i_flags); - list_del(&ip->i_ordered); + __ordered_del_inode(ip); continue; } list_move(&ip->i_ordered, &written); @@ -643,8 +648,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ordered_lock); while (!list_empty(&sdp->sd_log_ordered)) { ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); - list_del(&ip->i_ordered); - WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); + __ordered_del_inode(ip); if (ip->i_inode.i_mapping->nrpages == 0) continue; spin_unlock(&sdp->sd_ordered_lock); @@ -659,8 +663,7 @@ void gfs2_ordered_del_inode(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); spin_lock(&sdp->sd_ordered_lock); - if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) - list_del(&ip->i_ordered); + __ordered_del_inode(ip); spin_unlock(&sdp->sd_ordered_lock); } @@ -1002,6 +1005,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) out: if (gfs2_withdrawn(sdp)) { + /** + * If the tr_list is empty, we're withdrawing during a log + * flush that targets a transaction, but the transaction was + * never queued onto any of the ail lists. Here we add it to + * ail1 just so that ail_drain() will find and free it. + */ + spin_lock(&sdp->sd_ail_lock); + if (tr && list_empty(&tr->tr_list)) + list_add(&tr->tr_list, &sdp->sd_ail1_list); + spin_unlock(&sdp->sd_ail_lock); ail_drain(sdp); /* frees all transactions */ tr = NULL; } diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index c1cd6ae17659..8965c751a303 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -53,9 +53,9 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) if (gfs2_is_jdata(ip) || !gfs2_is_ordered(sdp)) return; - if (!test_bit(GIF_ORDERED, &ip->i_flags)) { + if (list_empty(&ip->i_ordered)) { spin_lock(&sdp->sd_ordered_lock); - if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags)) + if (list_empty(&ip->i_ordered)) list_add(&ip->i_ordered, &sdp->sd_log_ordered); spin_unlock(&sdp->sd_ordered_lock); } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 733470ca6be9..c7393ee9cf68 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -39,6 +39,7 @@ static void gfs2_init_inode_once(void *foo) atomic_set(&ip->i_sizehint, 0); init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); + INIT_LIST_HEAD(&ip->i_ordered); ip->i_qadata = NULL; gfs2_holder_mark_uninitialized(&ip->i_rgd_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 094f5fe7c009..6d18d2c91add 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1136,7 +1136,18 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_per_node; } - if (!sb_rdonly(sb)) { + if (sb_rdonly(sb)) { + struct gfs2_holder freeze_gh; + + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, + &freeze_gh); + if (error) { + fs_err(sdp, "can't make FS RO: %d\n", error); + goto fail_per_node; + } + gfs2_glock_dq_uninit(&freeze_gh); + } else { error = gfs2_make_fs_rw(sdp); if (error) { fs_err(sdp, "can't make FS RW: %d\n", error); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 96c345f49273..390ea79d682c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -364,8 +364,8 @@ void gfs2_recover_func(struct work_struct *work) /* Acquire a shared hold on the freeze lock */ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY, - &thaw_gh); + LM_FLAG_NOEXP | LM_FLAG_PRIORITY | + GL_EXACT, &thaw_gh); if (error) goto fail_gunlock_ji; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 32d8d26126a1..47d0ae158b69 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -167,7 +167,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) if (error) return error; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, &freeze_gh); if (error) goto fail_threads; @@ -203,7 +204,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) return 0; fail: - freeze_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&freeze_gh); fail_threads: if (sdp->sd_quotad_process) @@ -430,7 +430,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) } error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, - GL_NOCACHE, &sdp->sd_freeze_gh); + LM_FLAG_NOEXP, &sdp->sd_freeze_gh); if (error) goto out; @@ -613,13 +613,15 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { if (!log_write_allowed) { error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, GL_NOCACHE | - LM_FLAG_TRY, &freeze_gh); + LM_ST_SHARED, LM_FLAG_TRY | + LM_FLAG_NOEXP | GL_EXACT, + &freeze_gh); if (error == GLR_TRYFAILED) error = 0; } else { error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, GL_NOCACHE, + LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, &freeze_gh); if (error && !gfs2_withdrawn(sdp)) return error; @@ -761,8 +763,8 @@ void gfs2_freeze_func(struct work_struct *work) struct super_block *sb = sdp->sd_vfs; atomic_inc(&sb->s_active); - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &freeze_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, &freeze_gh); if (error) { fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error); gfs2_assert_withdraw(sdp, 0); @@ -774,8 +776,6 @@ void gfs2_freeze_func(struct work_struct *work) error); gfs2_assert_withdraw(sdp, 0); } - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) - freeze_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&freeze_gh); } deactivate_super(sb); diff --git a/fs/io_uring.c b/fs/io_uring.c index 700644a016a7..d37d7ea5ebe5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4072,14 +4072,22 @@ struct io_poll_table { int error; }; -static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb, - int notify) +static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb) { struct task_struct *tsk = req->task; - int ret; + struct io_ring_ctx *ctx = req->ctx; + int ret, notify = TWA_RESUME; - if (req->ctx->flags & IORING_SETUP_SQPOLL) + /* + * SQPOLL kernel thread doesn't need notification, just a wakeup. + * If we're not using an eventfd, then TWA_RESUME is always fine, + * as we won't have dependencies between request completions for + * other kernel wait conditions. + */ + if (ctx->flags & IORING_SETUP_SQPOLL) notify = 0; + else if (ctx->cq_ev_fd) + notify = TWA_SIGNAL; ret = task_work_add(tsk, cb, notify); if (!ret) @@ -4110,7 +4118,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, * of executing it. We can't safely execute it anyway, as we may not * have the needed state needed for it anyway. */ - ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL); + ret = io_req_task_work_add(req, &req->task_work); if (unlikely(ret)) { WRITE_ONCE(poll->canceled, true); tsk = io_wq_get_task(req->ctx->io_wq); @@ -6201,7 +6209,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (current->task_works) task_work_run(); if (signal_pending(current)) { - ret = -ERESTARTSYS; + if (current->jobctl & JOBCTL_TASK_WORK) { + spin_lock_irq(¤t->sighand->siglock); + current->jobctl &= ~JOBCTL_TASK_WORK; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + continue; + } + ret = -EINTR; break; } if (io_should_wake(&iowq, false)) @@ -6210,7 +6225,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } while (1); finish_wait(&ctx->wait, &iowq.wq); - restore_saved_sigmask_unless(ret == -ERESTARTSYS); + restore_saved_sigmask_unless(ret == -EINTR); return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 42c5128c7d1c..6c1166ccdaea 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -566,8 +566,9 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, goto out; /* don't even try if the size is too large */ - if (count > KMALLOC_MAX_SIZE) - return -ENOMEM; + error = -ENOMEM; + if (count >= KMALLOC_MAX_SIZE) + goto out; if (write) { kbuf = memdup_user_nul(ubuf, count); @@ -576,7 +577,6 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, goto out; } } else { - error = -ENOMEM; kbuf = kzalloc(count, GFP_KERNEL); if (!kbuf) goto out; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b43f0e8f43f2..9ed90368ab31 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -671,7 +671,8 @@ xlog_cil_push_work( /* * Wake up any background push waiters now this context is being pushed. */ - wake_up_all(&ctx->push_wait); + if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) + wake_up_all(&cil->xc_push_wait); /* * Check if we've anything to push. If there is nothing, then we don't @@ -743,13 +744,12 @@ xlog_cil_push_work( /* * initialise the new context and attach it to the CIL. Then attach - * the current context to the CIL committing lsit so it can be found + * the current context to the CIL committing list so it can be found * during log forces to extract the commit lsn of the sequence that * needs to be forced. */ INIT_LIST_HEAD(&new_ctx->committing); INIT_LIST_HEAD(&new_ctx->busy_extents); - init_waitqueue_head(&new_ctx->push_wait); new_ctx->sequence = ctx->sequence + 1; new_ctx->cil = cil; cil->xc_ctx = new_ctx; @@ -937,7 +937,7 @@ xlog_cil_push_background( if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ASSERT(cil->xc_ctx->space_used < log->l_logsize); - xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); + xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); return; } @@ -1216,12 +1216,12 @@ xlog_cil_init( INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); spin_lock_init(&cil->xc_push_lock); + init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_commit_wait); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); - init_waitqueue_head(&ctx->push_wait); ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ec22c7a3867f..75a62870b63a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -240,7 +240,6 @@ struct xfs_cil_ctx { struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ - wait_queue_head_t push_wait; /* background push throttle */ struct work_struct discard_endio_work; }; @@ -274,6 +273,7 @@ struct xfs_cil { wait_queue_head_t xc_commit_wait; xfs_lsn_t xc_current_sequence; struct work_struct xc_push_work; + wait_queue_head_t xc_push_wait; /* background push throttle */ } ____cacheline_aligned_in_smp; /* |