summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c23
-rw-r--r--fs/bcachefs/acl.c15
-rw-r--r--fs/bcachefs/alloc_background.c252
-rw-r--r--fs/bcachefs/alloc_foreground.c397
-rw-r--r--fs/bcachefs/alloc_foreground.h9
-rw-r--r--fs/bcachefs/async_objs.c29
-rw-r--r--fs/bcachefs/async_objs.h7
-rw-r--r--fs/bcachefs/async_objs_types.h2
-rw-r--r--fs/bcachefs/backpointers.c63
-rw-r--r--fs/bcachefs/bcachefs.h52
-rw-r--r--fs/bcachefs/bkey.c4
-rw-r--r--fs/bcachefs/bset.c74
-rw-r--r--fs/bcachefs/btree_cache.c33
-rw-r--r--fs/bcachefs/btree_gc.c89
-rw-r--r--fs/bcachefs/btree_io.c76
-rw-r--r--fs/bcachefs/btree_iter.c36
-rw-r--r--fs/bcachefs/btree_iter.h8
-rw-r--r--fs/bcachefs/btree_journal_iter.c20
-rw-r--r--fs/bcachefs/btree_key_cache.c7
-rw-r--r--fs/bcachefs/btree_locking.c17
-rw-r--r--fs/bcachefs/btree_node_scan.c31
-rw-r--r--fs/bcachefs/btree_trans_commit.c107
-rw-r--r--fs/bcachefs/btree_update.c49
-rw-r--r--fs/bcachefs/btree_update.h46
-rw-r--r--fs/bcachefs/btree_update_interior.c259
-rw-r--r--fs/bcachefs/btree_update_interior.h12
-rw-r--r--fs/bcachefs/btree_write_buffer.c45
-rw-r--r--fs/bcachefs/btree_write_buffer.h6
-rw-r--r--fs/bcachefs/buckets.c212
-rw-r--r--fs/bcachefs/buckets_waiting_for_journal.c30
-rw-r--r--fs/bcachefs/chardev.c120
-rw-r--r--fs/bcachefs/checksum.c54
-rw-r--r--fs/bcachefs/clock.c17
-rw-r--r--fs/bcachefs/compress.c29
-rw-r--r--fs/bcachefs/compress.h36
-rw-r--r--fs/bcachefs/data_update.c33
-rw-r--r--fs/bcachefs/debug.c92
-rw-r--r--fs/bcachefs/dirent.c24
-rw-r--r--fs/bcachefs/dirent.h8
-rw-r--r--fs/bcachefs/disk_accounting.c266
-rw-r--r--fs/bcachefs/disk_accounting.h9
-rw-r--r--fs/bcachefs/disk_groups.c27
-rw-r--r--fs/bcachefs/ec.c234
-rw-r--r--fs/bcachefs/ec.h2
-rw-r--r--fs/bcachefs/enumerated_ref.c4
-rw-r--r--fs/bcachefs/errcode.c3
-rw-r--r--fs/bcachefs/error.c51
-rw-r--r--fs/bcachefs/extents.c20
-rw-r--r--fs/bcachefs/extents.h1
-rw-r--r--fs/bcachefs/fast_list.c17
-rw-r--r--fs/bcachefs/fs-io-buffered.c67
-rw-r--r--fs/bcachefs/fs-io-direct.c9
-rw-r--r--fs/bcachefs/fs-io-pagecache.c53
-rw-r--r--fs/bcachefs/fs-io.c125
-rw-r--r--fs/bcachefs/fs-io.h19
-rw-r--r--fs/bcachefs/fs-ioctl.c33
-rw-r--r--fs/bcachefs/fs.c160
-rw-r--r--fs/bcachefs/fsck.c307
-rw-r--r--fs/bcachefs/inode.c39
-rw-r--r--fs/bcachefs/io_misc.c36
-rw-r--r--fs/bcachefs/io_read.c162
-rw-r--r--fs/bcachefs/io_read.h18
-rw-r--r--fs/bcachefs/io_write.c39
-rw-r--r--fs/bcachefs/journal.c244
-rw-r--r--fs/bcachefs/journal.h5
-rw-r--r--fs/bcachefs/journal_io.c177
-rw-r--r--fs/bcachefs/journal_reclaim.c226
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c10
-rw-r--r--fs/bcachefs/logged_ops.c14
-rw-r--r--fs/bcachefs/lru.c16
-rw-r--r--fs/bcachefs/migrate.c21
-rw-r--r--fs/bcachefs/move.c106
-rw-r--r--fs/bcachefs/move.h2
-rw-r--r--fs/bcachefs/movinggc.c6
-rw-r--r--fs/bcachefs/namei.c24
-rw-r--r--fs/bcachefs/nocow_locking.c10
-rw-r--r--fs/bcachefs/opts.c33
-rw-r--r--fs/bcachefs/opts.h8
-rw-r--r--fs/bcachefs/printbuf.h4
-rw-r--r--fs/bcachefs/progress.c3
-rw-r--r--fs/bcachefs/quota.c96
-rw-r--r--fs/bcachefs/rebalance.c49
-rw-r--r--fs/bcachefs/recovery.c211
-rw-r--r--fs/bcachefs/recovery_passes.c68
-rw-r--r--fs/bcachefs/recovery_passes.h3
-rw-r--r--fs/bcachefs/reflink.c59
-rw-r--r--fs/bcachefs/replicas.c147
-rw-r--r--fs/bcachefs/sb-clean.c36
-rw-r--r--fs/bcachefs/sb-counters_format.h6
-rw-r--r--fs/bcachefs/sb-downgrade.c19
-rw-r--r--fs/bcachefs/sb-errors.c45
-rw-r--r--fs/bcachefs/sb-members.c19
-rw-r--r--fs/bcachefs/sb-members.h2
-rw-r--r--fs/bcachefs/six.c21
-rw-r--r--fs/bcachefs/snapshot.c98
-rw-r--r--fs/bcachefs/snapshot_types.h2
-rw-r--r--fs/bcachefs/str_hash.c17
-rw-r--r--fs/bcachefs/subvolume.c102
-rw-r--r--fs/bcachefs/super-io.c76
-rw-r--r--fs/bcachefs/super.c562
-rw-r--r--fs/bcachefs/sysfs.c28
-rw-r--r--fs/bcachefs/tests.c198
-rw-r--r--fs/bcachefs/thread_with_file.c52
-rw-r--r--fs/bcachefs/time_stats.c7
-rw-r--r--fs/bcachefs/trace.h25
-rw-r--r--fs/bcachefs/util.c8
-rw-r--r--fs/bcachefs/util.h10
-rw-r--r--fs/bcachefs/xattr.c52
-rw-r--r--fs/btrfs/block-group.h2
-rw-r--r--fs/btrfs/delayed-inode.c5
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/free-space-tree.c56
-rw-r--r--fs/btrfs/inode.c123
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/scrub.c53
-rw-r--r--fs/btrfs/tree-log.c150
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/btrfs/zoned.c86
-rw-r--r--fs/erofs/data.c21
-rw-r--r--fs/erofs/decompressor.c12
-rw-r--r--fs/erofs/dir.c6
-rw-r--r--fs/erofs/fileio.c17
-rw-r--r--fs/erofs/internal.h6
-rw-r--r--fs/erofs/zdata.c8
-rw-r--r--fs/erofs/zmap.c19
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c9
-rw-r--r--fs/f2fs/file.c38
-rw-r--r--fs/f2fs/node.c1
-rw-r--r--fs/file.c8
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/namei.c19
-rw-r--r--fs/namespace.c115
-rw-r--r--fs/netfs/buffered_write.c38
-rw-r--r--fs/netfs/direct_write.c16
-rw-r--r--fs/netfs/internal.h26
-rw-r--r--fs/netfs/main.c6
-rw-r--r--fs/netfs/misc.c50
-rw-r--r--fs/netfs/read_collect.c16
-rw-r--r--fs/netfs/write_collect.c14
-rw-r--r--fs/netfs/write_retry.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c118
-rw-r--r--fs/nfs/inode.c17
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/overlayfs/namei.c10
-rw-r--r--fs/overlayfs/overlayfs.h8
-rw-r--r--fs/pidfs.c2
-rw-r--r--fs/pnode.h2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_sysctl.c18
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/resctrl/ctrlmondata.c13
-rw-r--r--fs/resctrl/internal.h4
-rw-r--r--fs/resctrl/monitor.c6
-rw-r--r--fs/resctrl/rdtgroup.c6
-rw-r--r--fs/smb/client/cached_dir.c14
-rw-r--r--fs/smb/client/cached_dir.h2
-rw-r--r--fs/smb/client/cifs_debug.c2
-rw-r--r--fs/smb/client/cifs_ioctl.h2
-rw-r--r--fs/smb/client/cifsglob.h3
-rw-r--r--fs/smb/client/cifsproto.h1
-rw-r--r--fs/smb/client/cifssmb.c22
-rw-r--r--fs/smb/client/connect.c74
-rw-r--r--fs/smb/client/file.c8
-rw-r--r--fs/smb/client/fs_context.c17
-rw-r--r--fs/smb/client/ioctl.c2
-rw-r--r--fs/smb/client/misc.c6
-rw-r--r--fs/smb/client/readdir.c2
-rw-r--r--fs/smb/client/reparse.c43
-rw-r--r--fs/smb/client/sess.c3
-rw-r--r--fs/smb/client/smb2pdu.c37
-rw-r--r--fs/smb/client/smbdirect.c166
-rw-r--r--fs/smb/client/trace.h24
-rw-r--r--fs/smb/server/connection.c2
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/smb2pdu.c103
-rw-r--r--fs/smb/server/transport_rdma.c15
-rw-r--r--fs/smb/server/transport_tcp.c3
-rw-r--r--fs/smb/server/vfs.c6
-rw-r--r--fs/smb/server/vfs_cache.h1
-rw-r--r--fs/super.c4
-rw-r--r--fs/xattr.c1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c41
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c31
-rw-r--r--fs/xfs/xfs_buf.c38
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c295
-rw-r--r--fs/xfs/xfs_buf_item.h3
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode_item.c5
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_mru_cache.c19
-rw-r--r--fs/xfs/xfs_qm.c86
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c5
-rw-r--r--fs/xfs/xfs_trace.h10
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_zone_alloc.c42
206 files changed, 4618 insertions, 4829 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e51e7d88980a..1d847a939f29 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
-static struct inode *anon_inode_make_secure_inode(
- const char *name,
- const struct inode *context_inode)
+/**
+ * anon_inode_make_secure_inode - allocate an anonymous inode with security context
+ * @sb: [in] Superblock to allocate from
+ * @name: [in] Name of the class of the newfile (e.g., "secretmem")
+ * @context_inode:
+ * [in] Optional parent inode for security inheritance
+ *
+ * The function ensures proper security initialization through the LSM hook
+ * security_inode_init_security_anon().
+ *
+ * Return: Pointer to new inode on success, ERR_PTR on failure.
+ */
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode)
{
struct inode *inode;
int error;
- inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ inode = alloc_anon_inode(sb);
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode(
}
return inode;
}
+EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
@@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
if (make_inode) {
- inode = anon_inode_make_secure_inode(name, context_inode);
+ inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb,
+ name, context_inode);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err;
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index d03adc36100e..307824d6eccb 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -279,7 +279,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
if (rcu)
return ERR_PTR(-ECHILD);
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
retry:
bch2_trans_begin(trans);
@@ -304,7 +304,6 @@ err:
set_cached_acl(&inode->v, type, acl);
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return acl;
}
@@ -350,8 +349,8 @@ int bch2_set_acl(struct mnt_idmap *idmap,
umode_t mode;
int ret;
- mutex_lock(&inode->ei_update_lock);
- struct btree_trans *trans = bch2_trans_get(c);
+ guard(mutex)(&inode->ei_update_lock);
+ CLASS(btree_trans, trans)(c);
retry:
bch2_trans_begin(trans);
acl = _acl;
@@ -385,17 +384,13 @@ btree_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
- goto err;
+ return ret;
bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_CTIME|ATTR_MODE);
set_cached_acl(&inode->v, type, acl);
-err:
- bch2_trans_put(trans);
- mutex_unlock(&inode->ei_update_lock);
-
- return ret;
+ return 0;
}
int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index d64839c756bc..4c1604fd80f9 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -565,11 +565,11 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke
int bch2_bucket_gens_init(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct bkey_i_bucket_gens g;
bool have_bucket_gens_key = false;
int ret;
+ CLASS(btree_trans, trans)(c);
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_prefetch, k, ({
/*
@@ -609,17 +609,14 @@ iter_err:
BCH_TRANS_COMMIT_no_enospc,
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
- bch2_trans_put(trans);
-
- bch_err_fn(c, ret);
return ret;
}
int bch2_alloc_read(struct bch_fs *c)
{
- down_read(&c->state_lock);
+ guard(rwsem_read)(&c->state_lock);
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bch_dev *ca = NULL;
int ret;
@@ -680,10 +677,6 @@ int bch2_alloc_read(struct bch_fs *c)
}
bch2_dev_put(ca);
- bch2_trans_put(trans);
-
- up_read(&c->state_lock);
- bch_err_fn(c, ret);
return ret;
}
@@ -699,7 +692,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
? BCH_FSCK_ERR_need_discard_key_wrong
: BCH_FSCK_ERR_freespace_key_wrong;
enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, alloc_k);
@@ -711,8 +704,6 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) ||
bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed))
ret = 0;
-
- printbuf_exit(&buf);
return ret;
}
@@ -860,10 +851,10 @@ int bch2_trigger_alloc(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
- struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
+ CLASS(bch2_dev_bucket_tryget, ca)(c, new.k->p);
if (!ca)
return bch_err_throw(c, trigger_alloc);
@@ -879,7 +870,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
ret = PTR_ERR_OR_ZERO(new_ka);
if (unlikely(ret))
- goto err;
+ return ret;
new_a = &new_ka->v;
}
@@ -913,7 +904,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?:
bch2_bucket_do_index(trans, ca, new.s_c, new_a, true);
if (ret)
- goto err;
+ return ret;
}
if (new_a->data_type == BCH_DATA_cached &&
@@ -925,7 +916,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
alloc_lru_idx_read(*old_a),
alloc_lru_idx_read(*new_a));
if (ret)
- goto err;
+ return ret;
ret = bch2_lru_change(trans,
BCH_LRU_BUCKET_FRAGMENTATION,
@@ -933,17 +924,17 @@ int bch2_trigger_alloc(struct btree_trans *trans,
alloc_lru_idx_fragmentation(*old_a, ca),
alloc_lru_idx_fragmentation(*new_a, ca));
if (ret)
- goto err;
+ return ret;
if (old_a->gen != new_a->gen) {
ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
if (ret)
- goto err;
+ return ret;
}
ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
if (ret)
- goto err;
+ return ret;
}
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
@@ -994,7 +985,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (bch2_fs_fatal_err_on(ret, c,
"setting bucket_needs_journal_commit: %s",
bch2_err_str(ret)))
- goto err;
+ return ret;
}
}
@@ -1036,16 +1027,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
g->gen_valid = 1;
g->gen = new_a->gen;
}
-err:
fsck_err:
- printbuf_exit(&buf);
- bch2_dev_put(ca);
return ret;
invalid_bucket:
bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
- ret = bch_err_throw(c, trigger_alloc);
- goto err;
+ return bch_err_throw(c, trigger_alloc);
}
/*
@@ -1164,10 +1151,10 @@ int bch2_check_alloc_key(struct btree_trans *trans,
const struct bch_alloc_v4 *a;
unsigned gens_offset;
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p);
+ CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p);
if (fsck_err_on(!ca,
trans, alloc_key_to_missing_dev_bucket,
"alloc key for invalid device:bucket %llu:%llu",
@@ -1177,7 +1164,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
return ret;
if (!ca->mi.freespace_initialized)
- goto out;
+ return 0;
a = bch2_alloc_to_v4(alloc_k, &a_convert);
@@ -1185,35 +1172,35 @@ int bch2_check_alloc_key(struct btree_trans *trans,
k = bch2_btree_iter_peek_slot(trans, discard_iter);
ret = bkey_err(k);
if (ret)
- goto err;
+ return ret;
bool is_discarded = a->data_type == BCH_DATA_need_discard;
if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded,
trans, alloc_k, !is_discarded, true, true)) {
ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded);
if (ret)
- goto err;
+ return ret;
}
bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
ret = bkey_err(k);
if (ret)
- goto err;
+ return ret;
bool is_free = a->data_type == BCH_DATA_free;
if (need_discard_or_freespace_err_on(!!k.k->type != is_free,
trans, alloc_k, !is_free, false, true)) {
ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free);
if (ret)
- goto err;
+ return ret;
}
bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
ret = bkey_err(k);
if (ret)
- goto err;
+ return ret;
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
trans, bucket_gens_key_wrong,
@@ -1226,7 +1213,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
ret = PTR_ERR_OR_ZERO(g);
if (ret)
- goto err;
+ return ret;
if (k.k->type == KEY_TYPE_bucket_gens) {
bkey_reassemble(&g->k_i, k);
@@ -1239,13 +1226,9 @@ int bch2_check_alloc_key(struct btree_trans *trans,
ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0);
if (ret)
- goto err;
+ return ret;
}
-out:
-err:
fsck_err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
return ret;
}
@@ -1257,7 +1240,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
struct btree_iter *freespace_iter)
{
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret;
if (!ca->mi.freespace_initialized)
@@ -1268,7 +1251,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
ret = bkey_err(k);
if (ret)
- goto err;
+ return ret;
*end = bkey_min(k.k->p, *end);
@@ -1281,10 +1264,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
end->offset)) {
struct bkey_i *update =
bch2_trans_kmalloc(trans, sizeof(*update));
-
ret = PTR_ERR_OR_ZERO(update);
if (ret)
- goto err;
+ return ret;
bkey_init(&update->k);
update->k.type = KEY_TYPE_set;
@@ -1295,11 +1277,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
ret = bch2_trans_update(trans, freespace_iter, update, 0);
if (ret)
- goto err;
+ return ret;
}
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1310,7 +1290,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
struct btree_iter *bucket_gens_iter)
{
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
unsigned i, gens_offset, gens_end_offset;
int ret;
@@ -1319,7 +1299,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
ret = bkey_err(k);
if (ret)
- goto err;
+ return ret;
if (bkey_cmp(alloc_gens_pos(start, &gens_offset),
alloc_gens_pos(*end, &gens_end_offset)))
@@ -1345,23 +1325,20 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
if (need_update) {
struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
-
ret = PTR_ERR_OR_ZERO(u);
if (ret)
- goto err;
+ return ret;
memcpy(u, &g, sizeof(g));
ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
if (ret)
- goto err;
+ return ret;
}
}
*end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0));
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1404,7 +1381,7 @@ int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_i
enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
? BCH_DATA_need_discard
: BCH_DATA_free;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool async_repair = fsck_flags & FSCK_ERR_NO_LOG;
fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE;
@@ -1456,7 +1433,6 @@ out:
fsck_err:
bch2_set_btree_iter_dontneed(trans, &alloc_iter);
bch2_trans_iter_exit(trans, &alloc_iter);
- printbuf_exit(&buf);
return ret;
delete:
if (!async_repair) {
@@ -1513,19 +1489,19 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
u64 b;
bool need_update = false;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(k.k->type != KEY_TYPE_bucket_gens);
bkey_reassemble(&g.k_i, k);
- struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
+ CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode);
if (!ca) {
if (fsck_err(trans, bucket_gens_to_invalid_dev,
"bucket_gens key for invalid device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- ret = bch2_btree_delete_at(trans, iter, 0);
- goto out;
+ return bch2_btree_delete_at(trans, iter, 0);
+ return 0;
}
if (fsck_err_on(end <= ca->mi.first_bucket ||
@@ -1533,8 +1509,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
trans, bucket_gens_to_invalid_buckets,
"bucket_gens key for invalid buckets:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = bch2_btree_delete_at(trans, iter, 0);
- goto out;
+ return bch2_btree_delete_at(trans, iter, 0);
}
for (b = start; b < ca->mi.first_bucket; b++)
@@ -1555,30 +1530,26 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
if (need_update) {
struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
-
ret = PTR_ERR_OR_ZERO(u);
if (ret)
- goto out;
+ return ret;
memcpy(u, &g, sizeof(g));
- ret = bch2_trans_update(trans, iter, u, 0);
+ return bch2_trans_update(trans, iter, u, 0);
}
-out:
fsck_err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_info(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
struct bch_dev *ca = NULL;
struct bkey hole;
struct bkey_s_c k;
int ret = 0;
+ CLASS(btree_trans, trans)(c);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_prefetch);
bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
@@ -1646,14 +1617,14 @@ bkey_err:
ca = NULL;
if (ret < 0)
- goto err;
+ return ret;
ret = for_each_btree_key(trans, iter,
BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_prefetch, k,
bch2_check_discard_freespace_key(trans, &iter));
if (ret)
- goto err;
+ return ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN,
BTREE_ITER_prefetch);
@@ -1670,11 +1641,9 @@ bkey_err:
continue;
}
if (ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
-
bch_err(c, "while checking %s", buf.buf);
- printbuf_exit(&buf);
break;
}
@@ -1682,16 +1651,14 @@ bkey_err:
}
bch2_trans_iter_exit(trans, &iter);
if (ret)
- goto err;
+ return ret;
ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_check_bucket_gens_key(trans, &iter, k));
-err:
- bch2_trans_put(trans);
- bch_err_fn(c, ret);
+
return ret;
}
@@ -1703,7 +1670,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
struct bkey_s_c alloc_k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret;
alloc_k = bch2_btree_iter_peek(trans, alloc_iter);
@@ -1714,7 +1681,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
return ret;
- struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode);
+ CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode);
if (!ca)
return 0;
@@ -1726,95 +1693,80 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
bucket_to_u64(alloc_k.k->p),
lru_idx, alloc_k, last_flushed);
if (ret)
- goto err;
+ return ret;
}
- if (a->data_type != BCH_DATA_cached)
- goto err;
+ if (a->data_type == BCH_DATA_cached) {
+ if (fsck_err_on(!a->io_time[READ],
+ trans, alloc_key_cached_but_read_time_zero,
+ "cached bucket with read_time 0\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ struct bkey_i_alloc_v4 *a_mut =
+ bch2_alloc_to_v4_mut(trans, alloc_k);
+ ret = PTR_ERR_OR_ZERO(a_mut);
+ if (ret)
+ return ret;
- if (fsck_err_on(!a->io_time[READ],
- trans, alloc_key_cached_but_read_time_zero,
- "cached bucket with read_time 0\n%s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- struct bkey_i_alloc_v4 *a_mut =
- bch2_alloc_to_v4_mut(trans, alloc_k);
- ret = PTR_ERR_OR_ZERO(a_mut);
- if (ret)
- goto err;
+ a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
+ ret = bch2_trans_update(trans, alloc_iter,
+ &a_mut->k_i, BTREE_TRIGGER_norun);
+ if (ret)
+ return ret;
- a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
- ret = bch2_trans_update(trans, alloc_iter,
- &a_mut->k_i, BTREE_TRIGGER_norun);
- if (ret)
- goto err;
+ a = &a_mut->v;
+ }
- a = &a_mut->v;
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
+ bucket_to_u64(alloc_k.k->p),
+ a->io_time[READ],
+ alloc_k, last_flushed);
}
-
- ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ],
- alloc_k, last_flushed);
- if (ret)
- goto err;
-err:
fsck_err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
struct bkey_buf last_flushed;
-
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
- bch2_check_stripe_to_lru_refs(c);
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)) ?:
+ bch2_check_stripe_to_lru_refs(trans);
bch2_bkey_buf_exit(&last_flushed, c);
- bch_err_fn(c, ret);
return ret;
}
static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
struct bch_fs *c = ca->fs;
- int ret;
- mutex_lock(&ca->discard_buckets_in_flight_lock);
+ guard(mutex)(&ca->discard_buckets_in_flight_lock);
struct discard_in_flight *i =
darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
- if (i) {
- ret = bch_err_throw(c, EEXIST_discard_in_flight_add);
- goto out;
- }
+ if (i)
+ return bch_err_throw(c, EEXIST_discard_in_flight_add);
- ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
+ return darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
.in_progress = in_progress,
.bucket = bucket,
}));
-out:
- mutex_unlock(&ca->discard_buckets_in_flight_lock);
- return ret;
}
static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
- mutex_lock(&ca->discard_buckets_in_flight_lock);
+ guard(mutex)(&ca->discard_buckets_in_flight_lock);
struct discard_in_flight *i =
darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
BUG_ON(!i || !i->in_progress);
darray_remove_item(&ca->discard_buckets_in_flight, i);
- mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
struct discard_buckets_state {
@@ -1836,7 +1788,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
struct btree_iter iter = {};
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool discard_locked = false;
int ret = 0;
@@ -1927,7 +1879,6 @@ fsck_err:
if (!ret)
s->seen++;
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
return ret;
}
@@ -2024,17 +1975,16 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
bool got_bucket = false;
u64 bucket;
- mutex_lock(&ca->discard_buckets_in_flight_lock);
- darray_for_each(ca->discard_buckets_in_flight, i) {
- if (i->in_progress)
- continue;
+ scoped_guard(mutex, &ca->discard_buckets_in_flight_lock)
+ darray_for_each(ca->discard_buckets_in_flight, i) {
+ if (i->in_progress)
+ continue;
- got_bucket = true;
- bucket = i->bucket;
- i->in_progress = true;
- break;
- }
- mutex_unlock(&ca->discard_buckets_in_flight_lock);
+ got_bucket = true;
+ bucket = i->bucket;
+ i->in_progress = true;
+ break;
+ }
if (!got_bucket)
break;
@@ -2142,7 +2092,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
s64 *nr_to_invalidate)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
struct btree_iter alloc_iter = {};
int ret = 0;
@@ -2203,7 +2153,6 @@ static int invalidate_one_bucket(struct btree_trans *trans,
out:
fsck_err:
bch2_trans_iter_exit(trans, &alloc_iter);
- printbuf_exit(&buf);
return ret;
}
@@ -2226,7 +2175,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
{
struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
struct bch_fs *c = ca->fs;
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
int ret = 0;
struct bkey_buf last_flushed;
@@ -2268,7 +2217,6 @@ restart_err:
}
bch2_trans_iter_exit(trans, &iter);
err:
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&last_flushed, c);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
@@ -2301,18 +2249,17 @@ void bch2_do_invalidates(struct bch_fs *c)
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
u64 bucket_start, u64 bucket_end)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey hole;
struct bpos end = POS(ca->dev_idx, bucket_end);
- struct bch_member *m;
unsigned long last_updated = jiffies;
int ret;
BUG_ON(bucket_start > bucket_end);
BUG_ON(bucket_end > ca->mi.nbuckets);
+ CLASS(btree_trans, trans)(c);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)),
BTREE_ITER_prefetch);
@@ -2383,17 +2330,16 @@ bkey_err:
}
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
if (ret < 0) {
bch_err_msg(ca, ret, "initializing free space");
return ret;
}
- mutex_lock(&c->sb_lock);
- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
+ }
return 0;
}
@@ -2403,7 +2349,6 @@ int bch2_fs_freespace_init(struct bch_fs *c)
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image))
return 0;
-
/*
* We can crash during the device add path, so we need to check this on
* every mount:
@@ -2428,9 +2373,8 @@ int bch2_fs_freespace_init(struct bch_fs *c)
}
if (doing_init) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
bch_verbose(c, "done initializing freespace");
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 5b9e64163d37..fd1415524e46 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -106,20 +106,20 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
return;
}
- spin_lock(&ob->lock);
- ob->valid = false;
- ob->data_type = 0;
- spin_unlock(&ob->lock);
+ scoped_guard(spinlock, &ob->lock) {
+ ob->valid = false;
+ ob->data_type = 0;
+ }
- spin_lock(&c->freelist_lock);
- bch2_open_bucket_hash_remove(c, ob);
+ scoped_guard(spinlock, &c->freelist_lock) {
+ bch2_open_bucket_hash_remove(c, ob);
- ob->freelist = c->open_buckets_freelist;
- c->open_buckets_freelist = ob - c->open_buckets;
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
- c->open_buckets_nr_free++;
- ca->nr_open_buckets--;
- spin_unlock(&c->freelist_lock);
+ c->open_buckets_nr_free++;
+ ca->nr_open_buckets--;
+ }
closure_wake_up(&c->open_buckets_wait);
}
@@ -164,14 +164,14 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
BUG_ON(c->open_buckets_partial_nr >=
ARRAY_SIZE(c->open_buckets_partial));
- spin_lock(&c->freelist_lock);
- scoped_guard(rcu)
+ scoped_guard(spinlock, &c->freelist_lock) {
+ guard(rcu)();
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
- ob->on_partial_list = true;
- c->open_buckets_partial[c->open_buckets_partial_nr++] =
- ob - c->open_buckets;
- spin_unlock(&c->freelist_lock);
+ ob->on_partial_list = true;
+ c->open_buckets_partial[c->open_buckets_partial_nr++] =
+ ob - c->open_buckets;
+ }
closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->freelist_wait);
@@ -219,33 +219,31 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
return NULL;
}
- spin_lock(&c->freelist_lock);
+ guard(spinlock)(&c->freelist_lock);
if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
- spin_unlock(&c->freelist_lock);
return ERR_PTR(bch_err_throw(c, open_buckets_empty));
}
/* Recheck under lock: */
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
- spin_unlock(&c->freelist_lock);
req->counters.skipped_open++;
return NULL;
}
struct open_bucket *ob = bch2_open_bucket_alloc(c);
- spin_lock(&ob->lock);
- ob->valid = true;
- ob->sectors_free = ca->mi.bucket_size;
- ob->dev = ca->dev_idx;
- ob->gen = gen;
- ob->bucket = bucket;
- spin_unlock(&ob->lock);
+ scoped_guard(spinlock, &ob->lock) {
+ ob->valid = true;
+ ob->sectors_free = ca->mi.bucket_size;
+ ob->dev = ca->dev_idx;
+ ob->gen = gen;
+ ob->bucket = bucket;
+ }
ca->nr_open_buckets++;
bch2_open_bucket_hash_add(c, ob);
@@ -253,7 +251,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false);
track_event_change(&c->times[BCH_TIME_blocked_allocate], false);
- spin_unlock(&c->freelist_lock);
return ob;
}
@@ -453,7 +450,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c,
struct closure *cl,
struct open_bucket *ob)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
printbuf_tabstop_push(&buf, 24);
@@ -480,8 +477,6 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c,
prt_printf(&buf, "err\t%s\n", bch2_err_str(PTR_ERR(ob)));
trace_bucket_alloc_fail(c, buf.buf);
}
-
- printbuf_exit(&buf);
}
/**
@@ -511,7 +506,8 @@ again:
bch2_dev_usage_read_fast(ca, &req->usage);
avail = dev_buckets_free(ca, req->usage, req->watermark);
- if (req->usage.buckets[BCH_DATA_need_discard] > avail)
+ if (req->usage.buckets[BCH_DATA_need_discard] >
+ min(avail, ca->mi.nbuckets >> 7))
bch2_dev_do_discards(ca);
if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail)
@@ -588,7 +584,8 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
.ca = ca,
};
- bch2_trans_do(c,
+ CLASS(btree_trans, trans)(c);
+ lockrestart_do(trans,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false)));
return ob;
}
@@ -706,19 +703,13 @@ static int add_new_bucket(struct bch_fs *c,
inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
struct alloc_request *req,
struct dev_stripe_state *stripe,
- struct closure *_cl)
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct closure *cl = NULL;
int ret = 0;
BUG_ON(req->nr_effective >= req->nr_replicas);
- /*
- * Try nonblocking first, so that if one device is full we'll try from
- * other devices:
- */
-retry_blocking:
bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc, &req->devs_sorted);
darray_for_each(req->devs_sorted, i) {
@@ -751,14 +742,6 @@ retry_blocking:
if (ret == 1)
return 0;
-
- if (ret &&
- !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
- cl != _cl) {
- cl = _cl;
- goto retry_blocking;
- }
-
if (ret)
return ret;
return bch_err_throw(c, insufficient_devices);
@@ -779,6 +762,12 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct bch_fs *c = trans->c;
int ret = 0;
+ if (req->nr_replicas < 2)
+ return 0;
+
+ if (ec_open_bucket(c, &req->ptrs))
+ return 0;
+
struct ec_stripe_head *h =
bch2_ec_stripe_head_get(trans, req, 0, cl);
if (IS_ERR(h))
@@ -855,17 +844,15 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c,
static int bucket_alloc_set_partial(struct bch_fs *c,
struct alloc_request *req)
{
- int i, ret = 0;
-
if (!c->open_buckets_partial_nr)
return 0;
- spin_lock(&c->freelist_lock);
+ guard(spinlock)(&c->freelist_lock);
if (!c->open_buckets_partial_nr)
- goto unlock;
+ return 0;
- for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
+ for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
if (want_bucket(c, req, ob)) {
@@ -885,16 +872,88 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
scoped_guard(rcu)
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
- ret = add_new_bucket(c, req, ob);
+ int ret = add_new_bucket(c, req, ob);
if (ret)
- break;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int __open_bucket_add_buckets(struct btree_trans *trans,
+ struct alloc_request *req,
+ struct closure *_cl)
+{
+ struct bch_fs *c = trans->c;
+ struct open_bucket *ob;
+ struct closure *cl = NULL;
+ unsigned i;
+ int ret;
+
+ req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target);
+
+ /* Don't allocate from devices we already have pointers to: */
+ darray_for_each(*req->devs_have, i)
+ __clear_bit(*i, req->devs_may_alloc.d);
+
+ open_bucket_for_each(c, &req->ptrs, ob, i)
+ __clear_bit(ob->dev, req->devs_may_alloc.d);
+
+ ret = bucket_alloc_set_writepoint(c, req);
+ if (ret)
+ return ret;
+
+ ret = bucket_alloc_set_partial(c, req);
+ if (ret)
+ return ret;
+
+ if (req->ec) {
+ ret = bucket_alloc_from_stripe(trans, req, _cl);
+ } else {
+retry_blocking:
+ /*
+ * Try nonblocking first, so that if one device is full we'll try from
+ * other devices:
+ */
+ ret = bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl);
+ if (ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+ !cl && _cl) {
+ cl = _cl;
+ goto retry_blocking;
}
}
-unlock:
- spin_unlock(&c->freelist_lock);
+
return ret;
}
+static int open_bucket_add_buckets(struct btree_trans *trans,
+ struct alloc_request *req,
+ struct closure *cl)
+{
+ int ret;
+
+ if (req->ec && !ec_open_bucket(trans->c, &req->ptrs)) {
+ ret = __open_bucket_add_buckets(trans, req, cl);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+ return ret;
+ if (req->nr_effective >= req->nr_replicas)
+ return 0;
+ }
+
+ bool ec = false;
+ swap(ec, req->ec);
+ ret = __open_bucket_add_buckets(trans, req, cl);
+ swap(ec, req->ec);
+
+ return ret < 0 ? ret : 0;
+}
+
/**
* should_drop_bucket - check if this is open_bucket should go away
* @ob: open_bucket to predicate on
@@ -915,23 +974,18 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
return ob->ec != NULL;
} else if (ca) {
bool drop = ob->dev == ca->dev_idx;
- struct open_bucket *ob2;
- unsigned i;
if (!drop && ob->ec) {
- unsigned nr_blocks;
+ guard(mutex)(&ob->ec->lock);
+ unsigned nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks;
- mutex_lock(&ob->ec->lock);
- nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks;
-
- for (i = 0; i < nr_blocks; i++) {
+ for (unsigned i = 0; i < nr_blocks; i++) {
if (!ob->ec->blocks[i])
continue;
- ob2 = c->open_buckets + ob->ec->blocks[i];
+ struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i];
drop |= ob2->dev == ca->dev_idx;
}
- mutex_unlock(&ob->ec->lock);
}
return drop;
@@ -947,14 +1001,13 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
struct open_bucket *ob;
unsigned i;
- mutex_lock(&wp->lock);
+ guard(mutex)(&wp->lock);
open_bucket_for_each(c, &wp->ptrs, ob, i)
if (should_drop_bucket(ob, c, ca, ec))
bch2_open_bucket_put(c, ob);
else
ob_push(c, &ptrs, ob);
wp->ptrs = ptrs;
- mutex_unlock(&wp->lock);
}
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
@@ -970,39 +1023,37 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
- mutex_lock(&c->btree_reserve_cache_lock);
- while (c->btree_reserve_cache_nr) {
- struct btree_alloc *a =
- &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+ scoped_guard(mutex, &c->btree_reserve_cache_lock)
+ while (c->btree_reserve_cache_nr) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- bch2_open_buckets_put(c, &a->ob);
- }
- mutex_unlock(&c->btree_reserve_cache_lock);
+ bch2_open_buckets_put(c, &a->ob);
+ }
- spin_lock(&c->freelist_lock);
i = 0;
- while (i < c->open_buckets_partial_nr) {
- struct open_bucket *ob =
- c->open_buckets + c->open_buckets_partial[i];
-
- if (should_drop_bucket(ob, c, ca, ec)) {
- --c->open_buckets_partial_nr;
- swap(c->open_buckets_partial[i],
- c->open_buckets_partial[c->open_buckets_partial_nr]);
-
- ob->on_partial_list = false;
-
- scoped_guard(rcu)
- bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
-
- spin_unlock(&c->freelist_lock);
- bch2_open_bucket_put(c, ob);
- spin_lock(&c->freelist_lock);
- } else {
- i++;
+ scoped_guard(spinlock, &c->freelist_lock)
+ while (i < c->open_buckets_partial_nr) {
+ struct open_bucket *ob =
+ c->open_buckets + c->open_buckets_partial[i];
+
+ if (should_drop_bucket(ob, c, ca, ec)) {
+ --c->open_buckets_partial_nr;
+ swap(c->open_buckets_partial[i],
+ c->open_buckets_partial[c->open_buckets_partial_nr]);
+
+ ob->on_partial_list = false;
+
+ scoped_guard(rcu)
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+
+ spin_unlock(&c->freelist_lock);
+ bch2_open_bucket_put(c, ob);
+ spin_lock(&c->freelist_lock);
+ } else {
+ i++;
+ }
}
- }
- spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
}
@@ -1056,22 +1107,17 @@ static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigne
struct open_bucket *ob;
unsigned i;
- mutex_lock(&c->write_points_hash_lock);
- if (c->write_points_nr < old_nr) {
- mutex_unlock(&c->write_points_hash_lock);
- return true;
- }
-
- if (c->write_points_nr == 1 ||
- !too_many_writepoints(c, 8)) {
- mutex_unlock(&c->write_points_hash_lock);
- return false;
- }
+ scoped_guard(mutex, &c->write_points_hash_lock) {
+ if (c->write_points_nr < old_nr)
+ return true;
- wp = c->write_points + --c->write_points_nr;
+ if (c->write_points_nr == 1 ||
+ !too_many_writepoints(c, 8))
+ return false;
- hlist_del_rcu(&wp->node);
- mutex_unlock(&c->write_points_hash_lock);
+ wp = c->write_points + --c->write_points_nr;
+ hlist_del_rcu(&wp->node);
+ }
bch2_trans_mutex_lock_norelock(trans, &wp->lock);
open_bucket_for_each(c, &wp->ptrs, ob, i)
@@ -1174,7 +1220,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
unsigned nr_replicas_required,
enum bch_watermark watermark,
enum bch_write_flags flags,
- struct closure *_cl,
+ struct closure *cl,
struct write_point **wp_ret)
{
struct bch_fs *c = trans->c;
@@ -1190,18 +1236,15 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
erasure_code = false;
- if (nr_replicas < 2)
- erasure_code = false;
-
req->nr_replicas = nr_replicas;
req->target = target;
+ req->ec = erasure_code;
req->watermark = watermark;
req->flags = flags;
req->devs_have = devs_have;
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
- req->ec = erasure_code;
req->ptrs.nr = 0;
req->nr_effective = 0;
req->have_cache = false;
@@ -1211,77 +1254,54 @@ retry:
req->data_type = req->wp->data_type;
- /* metadata may not allocate on cache devices: */
- if (req->data_type != BCH_DATA_user)
- req->have_cache = true;
-
- /* If we're going to fall back to the whole fs, try nonblocking first */
- struct closure *cl = req->target && !(flags & BCH_WRITE_only_specified_devs)
- ? _cl
- : NULL;
-
ret = bch2_trans_relock(trans);
if (ret)
goto err;
- while (1) {
- req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target);
-
- /* Don't allocate from devices we already have pointers to: */
- darray_for_each(*req->devs_have, i)
- __clear_bit(*i, req->devs_may_alloc.d);
-
- open_bucket_for_each(c, &req->ptrs, ob, i)
- __clear_bit(ob->dev, req->devs_may_alloc.d);
-
- ret = bucket_alloc_set_writepoint(c, req) ?:
- bucket_alloc_set_partial(c, req) ?:
- (req->ec
- ? bucket_alloc_from_stripe(trans, req, _cl)
- : bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl));
+ /* metadata may not allocate on cache devices: */
+ if (req->data_type != BCH_DATA_user)
+ req->have_cache = true;
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- goto err;
+ if (target && !(flags & BCH_WRITE_only_specified_devs)) {
+ ret = open_bucket_add_buckets(trans, req, NULL);
+ if (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
- if (ret == -BCH_ERR_open_buckets_empty)
- goto retry_blocking;
-
- if (ret == -BCH_ERR_freelist_empty) {
- if (req->target && !(flags & BCH_WRITE_only_specified_devs))
- goto retry_all;
- goto retry_blocking;
- }
-
- if (ret == -BCH_ERR_insufficient_devices && req->target)
- goto retry_all;
-
- if (req->nr_effective < req->nr_replicas && req->ec) {
- req->ec = false;
- continue;
- }
-
- if (ret == -BCH_ERR_insufficient_devices) {
- if (req->nr_effective < nr_replicas_required)
- goto err;
- ret = 0;
+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
+ int ret2 = open_bucket_add_buckets(trans, req, cl);
+ if (!ret2 ||
+ bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
+ ret = ret2;
+ goto alloc_done;
+ }
}
- BUG_ON(ret < 0);
- break;
-retry_blocking:
- if (cl == _cl)
- goto err;
- cl = _cl;
- continue;
-retry_all:
/*
* Only try to allocate cache (durability = 0 devices) from the
* specified target:
*/
req->have_cache = true;
req->target = 0;
+
+ ret = open_bucket_add_buckets(trans, req, cl);
+ } else {
+ ret = open_bucket_add_buckets(trans, req, cl);
}
+alloc_done:
+ BUG_ON(!ret && req->nr_effective < req->nr_replicas);
+
+ if (erasure_code && !ec_open_bucket(c, &req->ptrs))
+ pr_debug("failed to get ec bucket: ret %u", ret);
+
+ if (ret == -BCH_ERR_insufficient_devices &&
+ req->nr_effective >= nr_replicas_required)
+ ret = 0;
+
+ if (ret)
+ goto err;
if (req->nr_effective > req->nr_replicas)
deallocate_extra_replicas(c, req);
@@ -1431,35 +1451,25 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca)
{
- struct open_bucket *ob;
-
- out->atomic++;
+ guard(printbuf_atomic)(out);
- for (ob = c->open_buckets;
+ for (struct open_bucket *ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
- spin_lock(&ob->lock);
+ guard(spinlock)(&ob->lock);
if (ob->valid && (!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
- spin_unlock(&ob->lock);
}
-
- --out->atomic;
}
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
{
- unsigned i;
+ guard(printbuf_atomic)(out);
+ guard(spinlock)(&c->freelist_lock);
- out->atomic++;
- spin_lock(&c->freelist_lock);
-
- for (i = 0; i < c->open_buckets_partial_nr; i++)
+ for (unsigned i = 0; i < c->open_buckets_partial_nr; i++)
bch2_open_bucket_to_text(out, c,
c->open_buckets + c->open_buckets_partial[i]);
-
- spin_unlock(&c->freelist_lock);
- --out->atomic;
}
static const char * const bch2_write_point_states[] = {
@@ -1475,7 +1485,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
struct open_bucket *ob;
unsigned i;
- mutex_lock(&wp->lock);
+ guard(mutex)(&wp->lock);
prt_printf(out, "%lu: ", wp->write_point);
prt_human_readable_u64(out, wp->sectors_allocated << 9);
@@ -1494,8 +1504,6 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
open_bucket_for_each(c, &wp->ptrs, ob, i)
bch2_open_bucket_to_text(out, c, ob);
printbuf_indent_sub(out, 2);
-
- mutex_unlock(&wp->lock);
}
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
@@ -1582,7 +1590,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
c->opts.allocator_stuck_timeout);
@@ -1595,8 +1603,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_printbuf_make_room(&buf, 4096);
- buf.atomic++;
- scoped_guard(rcu)
+ scoped_guard(rcu) {
+ guard(printbuf_atomic)(&buf);
for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
printbuf_indent_add(&buf, 2);
@@ -1604,7 +1612,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
}
- --buf.atomic;
+ }
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
@@ -1618,7 +1626,6 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
printbuf_indent_sub(&buf, 2);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 1b3fc8460096..02aef66859c3 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -210,16 +210,11 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke
static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket)
{
- bool ret;
-
if (bch2_bucket_is_open(c, dev, bucket))
return true;
- spin_lock(&c->freelist_lock);
- ret = bch2_bucket_is_open(c, dev, bucket);
- spin_unlock(&c->freelist_lock);
-
- return ret;
+ guard(spinlock)(&c->freelist_lock);
+ return bch2_bucket_is_open(c, dev, bucket);
}
enum bch_write_flags;
diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c
index a7cd1f0f0964..ad04e5f0f056 100644
--- a/fs/bcachefs/async_objs.c
+++ b/fs/bcachefs/async_objs.c
@@ -13,28 +13,38 @@
#include <linux/debugfs.h>
-static void promote_obj_to_text(struct printbuf *out, void *obj)
+static void promote_obj_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ void *obj)
{
- bch2_promote_op_to_text(out, obj);
+ bch2_promote_op_to_text(out, c, obj);
}
-static void rbio_obj_to_text(struct printbuf *out, void *obj)
+static void rbio_obj_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ void *obj)
{
- bch2_read_bio_to_text(out, obj);
+ bch2_read_bio_to_text(out, c, obj);
}
-static void write_op_obj_to_text(struct printbuf *out, void *obj)
+static void write_op_obj_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ void *obj)
{
bch2_write_op_to_text(out, obj);
}
-static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj)
+static void btree_read_bio_obj_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ void *obj)
{
struct btree_read_bio *rbio = obj;
bch2_btree_read_bio_to_text(out, rbio);
}
-static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj)
+static void btree_write_bio_obj_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ void *obj)
{
struct btree_write_bio *wbio = obj;
bch2_bio_to_text(out, &wbio->wbio.bio);
@@ -79,13 +89,12 @@ static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf,
if (!i->size)
break;
- list->obj_to_text(&i->buf, obj);
+ list->obj_to_text(&i->buf, i->c, obj);
+ i->iter = iter.pos;
}
if (i->buf.allocation_failure)
ret = -ENOMEM;
- else
- i->iter = iter.pos;
if (!ret)
ret = bch2_debugfs_flush_buf(i);
diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h
index cd6489b8cf76..451db4c51fb2 100644
--- a/fs/bcachefs/async_objs.h
+++ b/fs/bcachefs/async_objs.h
@@ -3,9 +3,10 @@
#define _BCACHEFS_ASYNC_OBJS_H
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
-static inline void __async_object_list_del(struct fast_list *head, unsigned idx)
+static inline void __async_object_list_del(struct fast_list *head, unsigned *idx)
{
- fast_list_remove(head, idx);
+ fast_list_remove(head, *idx);
+ *idx = 0;
}
static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx)
@@ -16,7 +17,7 @@ static inline int __async_object_list_add(struct fast_list *head, void *obj, uns
}
#define async_object_list_del(_c, _list, idx) \
- __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx)
+ __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, &idx)
#define async_object_list_add(_c, _list, obj, idx) \
__async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx)
diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h
index 8d713c0f5841..ed262c874ad0 100644
--- a/fs/bcachefs/async_objs_types.h
+++ b/fs/bcachefs/async_objs_types.h
@@ -18,7 +18,7 @@ enum bch_async_obj_lists {
struct async_obj_list {
struct fast_list list;
- void (*obj_to_text)(struct printbuf *, void *);
+ void (*obj_to_text)(struct printbuf *, struct bch_fs *, void *);
unsigned idx;
};
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 77d93beb3c8f..bd26ab3e6812 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -108,7 +108,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
bool insert)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool will_check = c->recovery.passes_to_run &
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
int ret = 0;
@@ -144,8 +144,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
if (!will_check && __bch2_inconsistent_error(c, &buf))
ret = bch_err_throw(c, erofs_unfixed_errors);
- bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ if (buf.buf)
+ bch_err(c, "%s", buf.buf);
return ret;
}
@@ -208,7 +208,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
bool commit)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
/*
@@ -244,7 +244,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
"%s", buf.buf)) {
ret = bch2_backpointer_del(trans, bp.k->p);
if (ret || !commit)
- goto out;
+ return ret;
/*
* Normally, on transaction commit from inside a transaction,
@@ -262,9 +262,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
*/
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}
-out:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -388,7 +386,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = {};
struct bkey_s_c alloc_k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
struct bpos bucket;
@@ -423,7 +421,6 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st
out:
fsck_err:
bch2_trans_iter_exit(trans, &alloc_iter);
- printbuf_exit(&buf);
return ret;
}
@@ -434,14 +431,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)));
+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed));
bch2_bkey_buf_exit(&last_flushed, c);
- bch_err_fn(c, ret);
return ret;
}
@@ -471,7 +467,7 @@ static int check_extent_checksum(struct btree_trans *trans,
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
void *data_buf = NULL;
struct bio *bio = NULL;
size_t bytes;
@@ -530,7 +526,6 @@ err:
kvfree(data_buf);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_check_extent_checksums);
- printbuf_exit(&buf);
return ret;
}
@@ -541,7 +536,7 @@ static int check_bp_exists(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter other_extent_iter = {};
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (bpos_lt(bp->k.p, s->bp_start) ||
bpos_gt(bp->k.p, s->bp_end))
@@ -566,7 +561,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
- printbuf_exit(&buf);
return ret;
check_existing_bp:
/* Do we have a backpointer for a different extent? */
@@ -895,7 +889,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
u32 sectors[ALLOC_SECTORS_NR];
memset(sectors, 0, sizeof(sectors));
- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
+ CLASS(bch2_dev_bucket_tryget_noerror, ca)(trans->c, alloc_k.k->p);
if (!ca)
return 0;
@@ -932,12 +926,12 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
};
bch2_trans_iter_exit(trans, &iter);
if (ret)
- goto err;
+ return ret;
if (need_commit) {
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
- goto err;
+ return ret;
}
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
@@ -946,15 +940,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
if (ret)
- goto err;
+ return ret;
}
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
sectors[ALLOC_cached] > a->cached_sectors ||
sectors[ALLOC_stripe] > a->stripe_sectors) {
- ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
+ return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
bch_err_throw(c, transaction_restart_nested);
- goto err;
}
bool empty = (sectors[ALLOC_dirty] +
@@ -970,9 +963,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
*had_mismatch = true;
}
-err:
- bch2_dev_put(ca);
- return ret;
+
+ return 0;
}
static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
@@ -1107,7 +1099,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
int ret = 0;
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
bch2_bkey_buf_init(&s.last_flushed);
@@ -1146,7 +1138,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
if (!bpos_eq(s.bp_start, POS_MIN) ||
!bpos_eq(s.bp_end, SPOS_MAX)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "check_extents_to_backpointers(): ");
bch2_bpos_to_text(&buf, s.bp_start);
@@ -1154,7 +1146,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_bpos_to_text(&buf, s.bp_end);
bch_verbose(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
ret = bch2_check_extents_to_backpointers_pass(trans, &s);
@@ -1169,11 +1160,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
}
err:
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
bch2_btree_cache_unpin(c);
-
- bch_err_fn(c, ret);
return ret;
}
@@ -1211,7 +1199,7 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
u64 nr = ca->bucket_backpointer_mismatch.nr;
u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
__bch2_log_msg_start(ca->name, &buf);
prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
@@ -1222,7 +1210,6 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return 0;
}
@@ -1299,7 +1286,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
int bch2_check_backpointers_to_extents(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
int ret;
@@ -1319,7 +1306,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
if (bbpos_cmp(start, BBPOS_MIN) ||
bbpos_cmp(end, BBPOS_MAX)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "check_backpointers_to_extents(): ");
bch2_bbpos_to_text(&buf, start);
@@ -1327,7 +1314,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
bch2_bbpos_to_text(&buf, end);
bch_verbose(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
@@ -1336,11 +1322,8 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
start = bbpos_successor(end);
}
- bch2_trans_put(trans);
bch2_btree_cache_unpin(c);
-
- bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index fb3156ed7f0b..8a6f886b5bf2 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -329,19 +329,21 @@ do { \
bch2_print_str(_c, __VA_ARGS__); \
} while (0)
-#define bch_info(c, fmt, ...) \
- bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-#define bch_info_ratelimited(c, fmt, ...) \
- bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-#define bch_notice(c, fmt, ...) \
- bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
-#define bch_warn(c, fmt, ...) \
- bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-#define bch_warn_ratelimited(c, fmt, ...) \
- bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-
-#define bch_err(c, fmt, ...) \
- bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_log(c, loglevel, fmt, ...) \
+ bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_log_ratelimited(c, loglevel, fmt, ...) \
+ bch2_print_ratelimited(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__)
+
+#define bch_err(c, ...) bch_log(c, KERN_ERR, __VA_ARGS__)
+#define bch_err_ratelimited(c, ...) bch_log_ratelimited(c, KERN_ERR, __VA_ARGS__)
+#define bch_warn(c, ...) bch_log(c, KERN_WARNING, __VA_ARGS__)
+#define bch_warn_ratelimited(c, ...) bch_log_ratelimited(c, KERN_WARNING, __VA_ARGS__)
+#define bch_notice(c, ...) bch_log(c, KERN_NOTICE, __VA_ARGS__)
+#define bch_info(c, ...) bch_log(c, KERN_INFO, __VA_ARGS__)
+#define bch_info_ratelimited(c, ...) bch_log_ratelimited(c, KERN_INFO, __VA_ARGS__)
+#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__)
+#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__)
+
#define bch_err_dev(ca, fmt, ...) \
bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
#define bch_err_dev_offset(ca, _offset, fmt, ...) \
@@ -351,8 +353,6 @@ do { \
#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
-#define bch_err_ratelimited(c, fmt, ...) \
- bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_dev_ratelimited(ca, fmt, ...) \
bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
@@ -386,24 +386,6 @@ do { \
##__VA_ARGS__, bch2_err_str(_ret)); \
} while (0)
-#define bch_verbose(c, fmt, ...) \
-do { \
- if ((c)->opts.verbose) \
- bch_info(c, fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define bch_verbose_ratelimited(c, fmt, ...) \
-do { \
- if ((c)->opts.verbose) \
- bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define pr_verbose_init(opts, fmt, ...) \
-do { \
- if (opt_get(opts, verbose)) \
- pr_info(fmt, ##__VA_ARGS__); \
-} while (0)
-
static inline int __bch2_err_trace(struct bch_fs *c, int err)
{
trace_error_throw(c, err, _THIS_IP_);
@@ -833,6 +815,8 @@ struct bch_fs {
struct bch_disk_groups_cpu __rcu *disk_groups;
struct bch_opts opts;
+ unsigned loglevel;
+ unsigned prev_loglevel;
/* Updated by bch2_sb_update():*/
struct {
@@ -1181,7 +1165,7 @@ static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
static inline void bch2_ro_ref_put(struct bch_fs *c)
{
- if (refcount_dec_and_test(&c->ro_ref))
+ if (c && refcount_dec_and_test(&c->ro_ref))
wake_up(&c->ro_ref_wait);
}
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index ee823c640642..67e39f835b96 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -624,10 +624,8 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
}
if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf));
- printbuf_exit(&buf);
}
return ret;
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 32841f762eb2..72698c0d9f0e 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -58,7 +58,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
struct bkey_packed *_k, *_n;
struct bkey uk, n;
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (!i->u64s)
return;
@@ -97,8 +97,6 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p))
printk(KERN_ERR "Duplicate keys\n");
}
-
- printbuf_exit(&buf);
}
void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
@@ -113,7 +111,7 @@ void bch2_dump_btree_node_iter(struct btree *b,
struct btree_node_iter *iter)
{
struct btree_node_iter_set *set;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
printk(KERN_ERR "btree node iter with %u/%u sets:\n",
__btree_node_iter_used(iter), b->nsets);
@@ -128,8 +126,6 @@ void bch2_dump_btree_node_iter(struct btree *b,
printk(KERN_ERR "set %zu key %u: %s\n",
t - b->set, set->k, buf.buf);
}
-
- printbuf_exit(&buf);
}
struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
@@ -362,27 +358,6 @@ static struct bkey_float *bkey_float(const struct btree *b,
return ro_aux_tree_base(b, t)->f + idx;
}
-static void __bset_aux_tree_verify(struct btree *b)
-{
- for_each_bset(b, t) {
- if (t->aux_data_offset == U16_MAX)
- continue;
-
- BUG_ON(t != b->set &&
- t[-1].aux_data_offset == U16_MAX);
-
- BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
- BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
- BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
- }
-}
-
-static inline void bset_aux_tree_verify(struct btree *b)
-{
- if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
- __bset_aux_tree_verify(b);
-}
-
void bch2_btree_keys_init(struct btree *b)
{
unsigned i;
@@ -538,6 +513,51 @@ static inline void bch2_bset_verify_rw_aux_tree(struct btree *b,
__bch2_bset_verify_rw_aux_tree(b, t);
}
+static void __bset_aux_tree_verify_ro(struct btree *b, struct bset_tree *t)
+{
+ struct bkey_packed *k = btree_bkey_first(b, t);
+
+ eytzinger1_for_each(j, t->size - 1) {
+ while (tree_to_bkey(b, t, j) > k &&
+ k != btree_bkey_last(b, t))
+ k = bkey_p_next(k);
+
+ BUG_ON(tree_to_bkey(b, t, j) != k);
+ }
+}
+
+static void __bset_aux_tree_verify(struct btree *b)
+{
+ for_each_bset(b, t) {
+ if (t->aux_data_offset == U16_MAX)
+ continue;
+
+ BUG_ON(t != b->set &&
+ t[-1].aux_data_offset == U16_MAX);
+
+ BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
+ BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
+ BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
+
+ switch (bset_aux_tree_type(t)) {
+ case BSET_RO_AUX_TREE:
+ __bset_aux_tree_verify_ro(b, t);
+ break;
+ case BSET_RW_AUX_TREE:
+ __bch2_bset_verify_rw_aux_tree(b, t);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static inline void bset_aux_tree_verify(struct btree *b)
+{
+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups))
+ __bset_aux_tree_verify(b);
+}
+
/* returns idx of first entry >= offset: */
static unsigned rw_aux_tree_bsearch(struct btree *b,
struct bset_tree *t,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 49505653fe12..23ed7393f07f 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -78,9 +78,8 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
struct btree_cache *bc = &c->btree_cache;
- mutex_lock(&bc->lock);
- __bch2_btree_node_to_freelist(bc, b);
- mutex_unlock(&bc->lock);
+ scoped_guard(mutex, &bc->lock)
+ __bch2_btree_node_to_freelist(bc, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -215,14 +214,13 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
{
struct btree_cache *bc = &c->btree_cache;
- mutex_lock(&bc->lock);
+ guard(mutex)(&bc->lock);
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
set_btree_node_pinned(b);
list_move(&b->list, &bc->live[1].list);
bc->live[0].nr--;
bc->live[1].nr++;
}
- mutex_unlock(&bc->lock);
}
void bch2_btree_cache_unpin(struct bch_fs *c)
@@ -230,7 +228,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
struct btree_cache *bc = &c->btree_cache;
struct btree *b, *n;
- mutex_lock(&bc->lock);
+ guard(mutex)(&bc->lock);
c->btree_cache.pinned_nodes_mask[0] = 0;
c->btree_cache.pinned_nodes_mask[1] = 0;
@@ -240,8 +238,6 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
bc->live[0].nr++;
bc->live[1].nr--;
}
-
- mutex_unlock(&bc->lock);
}
/* Btree in memory cache - hash table */
@@ -296,11 +292,8 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
b->c.level = level;
b->c.btree_id = id;
- mutex_lock(&bc->lock);
- int ret = __bch2_btree_node_hash_insert(bc, b);
- mutex_unlock(&bc->lock);
-
- return ret;
+ guard(mutex)(&bc->lock);
+ return __bch2_btree_node_hash_insert(bc, b);
}
void bch2_btree_node_update_key_early(struct btree_trans *trans,
@@ -317,7 +310,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
if (!IS_ERR_OR_NULL(b)) {
- mutex_lock(&c->btree_cache.lock);
+ guard(mutex)(&c->btree_cache.lock);
__bch2_btree_node_hash_remove(&c->btree_cache, b);
@@ -325,7 +318,6 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
BUG_ON(ret);
- mutex_unlock(&c->btree_cache.lock);
six_unlock_read(&b->c.lock);
}
@@ -930,20 +922,18 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
}
if (unlikely(!bkey_is_btree_ptr(&k->k))) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
- printbuf_exit(&buf);
return ERR_PTR(ret);
}
if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
- printbuf_exit(&buf);
return ERR_PTR(ret);
}
@@ -1018,11 +1008,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
{
- struct printbuf buf = PRINTBUF;
-
if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations)
return;
+ CLASS(printbuf, buf)();
prt_printf(&buf,
"btree node header doesn't match ptr: ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
@@ -1038,8 +1027,6 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
bch2_bpos_to_text(&buf, b->data->max_key);
bch2_fs_topology_error(c, "%s", buf.buf);
-
- printbuf_exit(&buf);
}
static inline void btree_check_header(struct bch_fs *c, struct btree *b)
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 7269490a5d9a..34cb8a4324dc 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -95,11 +95,10 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
- preempt_disable();
+ guard(preempt)();
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
- preempt_enable();
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
@@ -138,14 +137,13 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
int ret;
if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_str(&buf, " -> ");
bch2_bpos_to_text(&buf, new_min);
bch_info(c, "%s(): %s", __func__, buf.buf);
- printbuf_exit(&buf);
}
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
@@ -174,14 +172,13 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
int ret;
if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_str(&buf, " -> ");
bch2_bpos_to_text(&buf, new_max);
bch_info(c, "%s(): %s", __func__, buf.buf);
- printbuf_exit(&buf);
}
ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
@@ -205,13 +202,12 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
bch2_btree_node_drop_keys_outside_node(b);
- mutex_lock(&c->btree_cache.lock);
+ guard(mutex)(&c->btree_cache.lock);
__bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, &new->k_i);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
BUG_ON(ret);
- mutex_unlock(&c->btree_cache.lock);
return 0;
}
@@ -223,7 +219,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
struct bpos expected_start = !prev
? b->data->min_key
: bpos_successor(prev->key.k.p);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
@@ -253,7 +249,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
expected_start,
bpos_predecessor(cur->data->min_key));
if (ret)
- goto err;
+ return ret;
*pulled_from_scan = cur->data->min_key;
ret = DID_FILL_FROM_SCAN;
@@ -286,9 +282,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
}
}
}
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -296,7 +290,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
struct btree *child, struct bpos *pulled_from_scan)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
if (bpos_eq(child->key.k.p, b->key.k.p))
@@ -317,7 +311,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
bpos_successor(child->key.k.p), b->key.k.p);
if (ret)
- goto err;
+ return ret;
*pulled_from_scan = b->key.k.p;
ret = DID_FILL_FROM_SCAN;
@@ -325,9 +319,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
ret = set_node_max(c, child, b->key.k.p);
}
}
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -340,7 +332,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
struct bkey_buf prev_k, cur_k;
struct btree *prev = NULL, *cur = NULL;
bool have_child, new_pass = false;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
if (!b->c.level)
@@ -529,7 +521,6 @@ fsck_err:
bch2_bkey_buf_exit(&prev_k, c);
bch2_bkey_buf_exit(&cur_k, c);
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -539,7 +530,7 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree,
{
struct bch_fs *c = trans->c;
struct btree_root *r = bch2_btree_id_root(c, btree);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
bch2_btree_id_to_text(&buf, btree);
@@ -568,21 +559,20 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree,
bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX);
if (ret)
- goto err;
+ return ret;
}
*reconstructed_root = true;
}
err:
fsck_err:
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
int bch2_check_topology(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bpos pulled_from_scan = POS_MIN;
int ret = 0;
@@ -603,9 +593,8 @@ recover:
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
- mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
r->b = NULL;
@@ -614,17 +603,15 @@ recover:
goto recover;
}
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_btree_id_to_text(&buf, i);
bch_err(c, "empty btree root %s", buf.buf);
- printbuf_exit(&buf);
bch2_btree_root_alloc_fake_trans(trans, i, 0);
r->alive = false;
ret = 0;
}
}
- bch2_trans_put(trans);
return ret;
}
@@ -651,7 +638,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
deleted.p = k.k->p;
@@ -675,10 +662,9 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bch2_dev_btree_bitmap_mark(c, k);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
/*
@@ -703,7 +689,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags);
out:
fsck_err:
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -771,8 +756,8 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
static int bch2_gc_btrees(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct printbuf buf = PRINTBUF;
+ CLASS(btree_trans, trans)(c);
+ CLASS(printbuf, buf)();
int ret = 0;
struct progress_indicator_state progress;
@@ -792,8 +777,6 @@ static int bch2_gc_btrees(struct bch_fs *c)
ret = bch2_gc_btree(trans, &progress, btree, true);
}
- printbuf_exit(&buf);
- bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
}
@@ -945,16 +928,16 @@ fsck_err:
static int bch2_gc_alloc_done(struct bch_fs *c)
{
+ CLASS(btree_trans, trans)(c);
int ret = 0;
for_each_member_device(c, ca) {
- ret = bch2_trans_run(c,
- for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc,
+ ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
POS(ca->dev_idx, ca->mi.nbuckets - 1),
BTREE_ITER_slots|BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_alloc_write_key(trans, &iter, ca, k)));
+ bch2_alloc_write_key(trans, &iter, ca, k));
if (ret) {
bch2_dev_put(ca);
break;
@@ -987,7 +970,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
const struct bch_stripe *s;
struct gc_stripe *m;
bool bad = false;
@@ -1032,18 +1015,17 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans,
ret = bch2_trans_update(trans, iter, &new->k_i, 0);
}
fsck_err:
- printbuf_exit(&buf);
return ret;
}
static int bch2_gc_stripes_done(struct bch_fs *c)
{
- return bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_stripes, POS_MIN,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_gc_write_stripes_key(trans, &iter, k)));
+ bch2_gc_write_stripes_key(trans, &iter, k));
}
/**
@@ -1072,8 +1054,8 @@ int bch2_check_allocations(struct bch_fs *c)
{
int ret;
- down_read(&c->state_lock);
- down_write(&c->gc_lock);
+ guard(rwsem_read)(&c->state_lock);
+ guard(rwsem_write)(&c->gc_lock);
bch2_btree_interior_updates_flush(c);
@@ -1102,15 +1084,11 @@ int bch2_check_allocations(struct bch_fs *c)
bch2_gc_stripes_done(c) ?:
bch2_gc_reflink_done(c);
out:
- percpu_down_write(&c->mark_lock);
- /* Indicates that gc is no longer in progress: */
- __gc_pos_set(c, gc_phase(GC_PHASE_not_running));
-
- bch2_gc_free(c);
- percpu_up_write(&c->mark_lock);
-
- up_write(&c->gc_lock);
- up_read(&c->state_lock);
+ scoped_guard(percpu_write, &c->mark_lock) {
+ /* Indicates that gc is no longer in progress: */
+ __gc_pos_set(c, gc_phase(GC_PHASE_not_running));
+ bch2_gc_free(c);
+ }
/*
* At startup, allocations can happen directly instead of via the
@@ -1121,7 +1099,6 @@ out:
if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
bch2_sb_members_clean_deleted(c);
- bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index b30799e494eb..bd86dd7151a1 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -24,6 +24,7 @@
#include "super-io.h"
#include "trace.h"
+#include <linux/moduleparam.h>
#include <linux/sched/mm.h>
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -591,7 +592,7 @@ static int __btree_err(int ret,
!(test_bit(BCH_FS_in_fsck, &c->flags) &&
c->opts.fix_errors == FSCK_FIX_ask);
- struct printbuf out = PRINTBUF;
+ CLASS(printbuf, out)();
bch2_log_msg_start(c, &out);
if (!print_deferred)
@@ -618,13 +619,13 @@ static int __btree_err(int ret,
if (!have_retry)
ret = bch_err_throw(c, fsck_fix);
- goto out;
+ return ret;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
break;
}
- goto out;
+ return ret;
}
if (rw == WRITE) {
@@ -646,16 +647,14 @@ static int __btree_err(int ret,
if (!have_retry)
ret = bch_err_throw(c, fsck_fix);
- goto out;
+ return ret;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
break;
}
print:
bch2_print_str(c, KERN_ERR, out.buf);
-out:
fsck_err:
- printbuf_exit(&out);
return ret;
}
@@ -734,8 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
+ CLASS(printbuf, buf1)();
+ CLASS(printbuf, buf2)();
int ret = 0;
btree_err_on(!bch2_version_compatible(version),
@@ -754,10 +753,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
"bset version %u older than superblock version_min %u",
version, c->sb.version_min)) {
if (bch2_version_compatible(version)) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
c->disk_sb.sb->version_min = cpu_to_le16(version);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
} else {
/* We have no idea what's going on: */
i->version = cpu_to_le16(c->sb.version);
@@ -771,10 +769,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_node_bset_newer_than_sb,
"bset version %u newer than superblock version %u",
version, c->sb.version)) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
c->disk_sb.sb->version = cpu_to_le16(version);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
@@ -874,8 +871,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
&bn->format);
}
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
return ret;
}
@@ -945,7 +940,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
int ret = 0;
@@ -1050,7 +1045,6 @@ got_good_key:
set_btree_node_need_rewrite_error(b);
}
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1069,7 +1063,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
u64 max_journal_seq = 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0, write = READ;
u64 start_time = local_clock();
@@ -1301,9 +1295,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
- if (updated_range)
- bch2_btree_node_drop_keys_outside_node(b);
-
i = &b->data->keys;
for (k = i->start; k != vstruct_last(i);) {
struct bkey tmp;
@@ -1341,6 +1332,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
+ if (updated_range)
+ bch2_btree_node_drop_keys_outside_node(b);
+
/*
* XXX:
*
@@ -1384,7 +1378,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
}
fsck_err:
mempool_free(iter, &c->fill_iter);
- printbuf_exit(&buf);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
return ret;
}
@@ -1400,7 +1393,7 @@ static void btree_node_read_work(struct work_struct *work)
struct bch_io_failures failed = { .nr = 0 };
int ret = 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree node read error at btree ");
@@ -1492,7 +1485,6 @@ start:
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
- printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
smp_mb__after_atomic();
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
@@ -1574,7 +1566,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
closure_type(ra, struct btree_node_read_all, cl);
struct bch_fs *c = ra->c;
struct btree *b = ra->b;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool dump_bset_maps = false;
int ret = 0, best = -1, write = READ;
unsigned i, written = 0, written2 = 0;
@@ -1683,11 +1675,10 @@ fsck_err:
if (ret) {
set_btree_node_read_error(b);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_btree_lost_data(c, &buf, b->c.btree_id);
if (buf.pos)
bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
} else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
@@ -1698,7 +1689,6 @@ fsck_err:
closure_debug_destroy(&ra->cl);
kfree(ra);
- printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
smp_mb__after_atomic();
@@ -1818,7 +1808,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
if (ret <= 0) {
bool ratelimit = true;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_str(&buf, "btree node read error: no device to read from\n at ");
@@ -1835,7 +1825,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
DEFAULT_RATELIMIT_BURST);
if (!ratelimit || __ratelimit(&rs))
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
set_btree_node_read_error(b);
clear_btree_node_read_in_flight(b);
@@ -1917,9 +1906,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
bch2_btree_node_read(trans, b, true);
if (btree_node_read_error(b)) {
- mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
ret = bch_err_throw(c, btree_node_read_error);
goto err;
@@ -1936,7 +1924,8 @@ err:
int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
const struct bkey_i *k, unsigned level)
{
- return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
+ CLASS(btree_trans, trans)(c);
+ return __bch2_btree_root_read(trans, id, k, level);
}
struct btree_node_scrub {
@@ -2015,7 +2004,7 @@ static void btree_node_scrub_work(struct work_struct *work)
{
struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
struct bch_fs *c = scrub->c;
- struct printbuf err = PRINTBUF;
+ CLASS(printbuf, err)();
__bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
bkey_i_to_s_c(scrub->key.k));
@@ -2030,7 +2019,6 @@ static void btree_node_scrub_work(struct work_struct *work)
bch_err_fn_ratelimited(c, ret);
}
- printbuf_exit(&err);
bch2_bkey_buf_exit(&scrub->key, c);;
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
@@ -2211,7 +2199,8 @@ static void btree_node_write_work(struct work_struct *work)
}
} else {
- ret = bch2_trans_do(c,
+ CLASS(btree_trans, trans)(c);
+ ret = lockrestart_do(trans,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
@@ -2230,11 +2219,10 @@ err:
set_btree_node_noevict(b);
if (!bch2_err_matches(ret, EROFS)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
bch2_btree_pos_to_text(&buf, c, b);
bch2_fs_fatal_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
goto out;
}
@@ -2253,13 +2241,12 @@ static void btree_node_write_endio(struct bio *bio)
wbio->submit_time, !bio->bi_status);
if (ca && bio->bi_status) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
prt_printf(&buf, "btree write error: %s\n ",
bch2_blk_status_to_str(bio->bi_status));
bch2_btree_pos_to_text(&buf, c, b);
bch_err_dev_ratelimited(ca, "%s", buf.buf);
- printbuf_exit(&buf);
}
if (bio->bi_status) {
@@ -2553,9 +2540,14 @@ do_write:
}
count_event(c, btree_node_write);
+ /*
+ * blk-wbt.c throttles all writes except those that have both REQ_SYNC
+ * and REQ_IDLE set...
+ */
+
wbio = container_of(bio_alloc_bioset(NULL,
buf_pages(data, sectors_to_write << 9),
- REQ_OP_WRITE|REQ_META,
+ REQ_OP_WRITE|REQ_META|REQ_SYNC|REQ_IDLE,
GFP_NOFS,
&c->btree_bio),
struct btree_write_bio, wbio.bio);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 7463946898c0..cc771affa511 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -903,7 +903,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
k = bch2_btree_and_journal_iter_peek(&jiter);
if (!k.k) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "node not found at pos ");
bch2_bpos_to_text(&buf, path->pos);
@@ -911,7 +911,6 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
bch2_btree_pos_to_text(&buf, c, l->b);
ret = bch2_fs_topology_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
goto err;
}
@@ -930,7 +929,7 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "node not found at pos ");
bch2_bpos_to_text(&buf, path->pos);
@@ -1451,7 +1450,7 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_
static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
{
#ifdef CONFIG_BCACHEFS_DEBUG
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_prt_backtrace(&buf, &trans->last_restarted_trace);
panic("in transaction restart: %s, last restarted by\n%s",
bch2_err_str(trans->restarted),
@@ -1601,13 +1600,13 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
static noinline __cold
void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
+ bch2_log_msg_start(trans->c, &buf);
__bch2_trans_paths_to_text(&buf, trans, nosort);
bch2_trans_updates_to_text(&buf, trans);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
noinline __cold
@@ -1620,22 +1619,19 @@ noinline __cold
static void bch2_trans_update_max_paths(struct btree_trans *trans)
{
struct btree_transaction_stats *s = btree_trans_stats(trans);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths);
bch2_trans_paths_to_text(&buf, trans);
if (!buf.allocation_failure) {
- mutex_lock(&s->lock);
+ guard(mutex)(&s->lock);
if (nr > s->nr_max_paths) {
s->nr_max_paths = nr;
swap(s->max_paths_text, buf.buf);
}
- mutex_unlock(&s->lock);
}
- printbuf_exit(&buf);
-
trans->nr_paths_max = nr;
}
@@ -1643,11 +1639,10 @@ noinline __cold
int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
{
if (trace_trans_restart_too_many_iters_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_trans_paths_to_text(&buf, trans);
trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf);
- printbuf_exit(&buf);
}
count_event(trans->c, trans_restart_too_many_iters);
@@ -3196,14 +3191,13 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
BTREE_TRANS_MEM_MAX);
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
#endif
}
@@ -3213,7 +3207,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (new_bytes > s->max_mem) {
- mutex_lock(&s->lock);
+ guard(mutex)(&s->lock);
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
@@ -3225,7 +3219,6 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
s->trans_kmalloc_trace.nr);
#endif
s->max_mem = new_bytes;
- mutex_unlock(&s->lock);
}
if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) {
@@ -3535,7 +3528,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
struct btree_path *path;
unsigned i;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn);
@@ -3547,7 +3540,6 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
}
#else
@@ -3672,11 +3664,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
/* trans->paths is rcu protected vs. freeing */
guard(rcu)();
- out->atomic++;
+ guard(printbuf_atomic)(out);
struct btree_path *paths = rcu_dereference(trans->paths);
if (!paths)
- goto out;
+ return;
unsigned long *paths_allocated = trans_paths_allocated(paths);
@@ -3712,8 +3704,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
bch2_btree_bkey_cached_common_to_text(out, b);
prt_newline(out);
}
-out:
- --out->atomic;
}
void bch2_fs_btree_iter_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index cc2c6bb6b6a8..53074ed62e09 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -1007,13 +1007,19 @@ static inline void class_btree_trans_destructor(struct btree_trans **p)
#define class_btree_trans_constructor(_c) bch2_trans_get(_c)
+/* deprecated, prefer CLASS(btree_trans) */
#define bch2_trans_run(_c, _do) \
({ \
CLASS(btree_trans, trans)(_c); \
(_do); \
})
-#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
+/* deprecated, prefer CLASS(btree_trans) */
+#define bch2_trans_do(_c, _do) \
+({ \
+ CLASS(btree_trans, trans)(_c); \
+ lockrestart_do(trans, _do); \
+})
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index ea839560a136..24f2fbe84ad7 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -5,6 +5,7 @@
#include "bset.h"
#include "btree_cache.h"
#include "btree_journal_iter.h"
+#include "disk_accounting.h"
#include "journal_io.h"
#include <linux/sort.h>
@@ -278,12 +279,23 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
if (idx < keys->size &&
journal_key_cmp(&n, &keys->data[idx]) == 0) {
+ struct bkey_i *o = keys->data[idx].k;
+
+ if (k->k.type == KEY_TYPE_accounting &&
+ o->k.type == KEY_TYPE_accounting) {
+ if (!keys->data[idx].allocated)
+ goto insert;
+
+ bch2_accounting_accumulate(bkey_i_to_accounting(k),
+ bkey_i_to_s_c_accounting(o));
+ }
+
if (keys->data[idx].allocated)
kfree(keys->data[idx].k);
keys->data[idx] = n;
return 0;
}
-
+insert:
if (idx > keys->gap)
idx -= keys->size - keys->nr;
@@ -450,9 +462,8 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
keys->data[idx].level == level &&
bpos_eq(keys->data[idx].k->k.p, pos) &&
!keys->data[idx].overwritten) {
- mutex_lock(&keys->overwrite_lock);
+ guard(mutex)(&keys->overwrite_lock);
__bch2_journal_key_overwritten(keys, idx);
- mutex_unlock(&keys->overwrite_lock);
}
}
@@ -803,7 +814,7 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
void bch2_journal_keys_dump(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
pr_info("%zu keys:", keys->nr);
@@ -817,7 +828,6 @@ void bch2_journal_keys_dump(struct bch_fs *c)
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
pr_err("%s", buf.buf);
}
- printbuf_exit(&buf);
}
void bch2_fs_journal_keys_init(struct bch_fs *c)
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 19d1bb806395..ebba14da92b4 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -301,13 +301,12 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans
struct btree_path *ck_path,
struct bkey_s_c k)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bpos_to_text(&buf, ck_path->pos);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, trans->c, k);
trace_key_cache_fill(trans, buf.buf);
- printbuf_exit(&buf);
}
static noinline int btree_key_cache_fill(struct btree_trans *trans,
@@ -540,10 +539,10 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
- struct btree_trans *trans = bch2_trans_get(c);
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int ret = 0;
+ CLASS(btree_trans, trans)(c);
btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read);
key = ck->key;
@@ -566,8 +565,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
BCH_TRANS_COMMIT_journal_reclaim, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
-
- bch2_trans_put(trans);
return ret;
}
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index bed2b4b6ffb9..38c5643e8a78 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -159,13 +159,11 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans
count_event(c, trans_restart_would_deadlock);
if (trace_trans_restart_would_deadlock_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
- buf.atomic++;
print_cycle(&buf, g);
-
trace_trans_restart_would_deadlock(trans, buf.buf);
- printbuf_exit(&buf);
}
}
@@ -196,8 +194,8 @@ static int btree_trans_abort_preference(struct btree_trans *trans)
static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
{
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
@@ -214,7 +212,6 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
}
bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
BUG();
}
@@ -692,7 +689,7 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans,
count_event(trans->c, trans_restart_upgrade);
if (trace_trans_restart_upgrade_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "%s %pS\n", trans->fn, (void *) _RET_IP_);
prt_printf(&buf, "btree %s pos\n", bch2_btree_id_str(path->btree_id));
@@ -708,7 +705,6 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans,
path->l[f.l].lock_seq);
trace_trans_restart_upgrade(trans->c, buf.buf);
- printbuf_exit(&buf);
}
out:
bch2_trans_verify_locks(trans);
@@ -777,7 +773,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st
goto out;
if (trace_trans_restart_relock_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bpos_to_text(&buf, path->pos);
prt_printf(&buf, " %s l=%u seq=%u node seq=",
@@ -797,7 +793,6 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st
}
trace_trans_restart_relock(trans, ip, buf.buf);
- printbuf_exit(&buf);
}
count_event(trans->c, trans_restart_relock);
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 365808b4b7c0..d997e3818c30 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -65,16 +65,6 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n
memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
}
-static inline u64 bkey_journal_seq(struct bkey_s_c k)
-{
- switch (k.k->type) {
- case KEY_TYPE_inode_v3:
- return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq);
- default:
- return 0;
- }
-}
-
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
{
const struct found_btree_node *l = _l;
@@ -206,17 +196,15 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
n.sectors_written = b->written;
- mutex_lock(&f->lock);
+ guard(mutex)(&f->lock);
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
bch_err(c, "try_read_btree_node() can't handle endian conversion");
f->ret = -EINVAL;
- goto unlock;
+ return;
}
if (darray_push(&f->nodes, n))
f->ret = -ENOMEM;
-unlock:
- mutex_unlock(&f->lock);
}
}
@@ -226,15 +214,17 @@ static int read_btree_nodes_worker(void *p)
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
struct bch_dev *ca = w->ca;
unsigned long last_print = jiffies;
+ struct btree *b = NULL;
+ struct bio *bio = NULL;
- struct btree *b = __bch2_btree_node_mem_alloc(c);
+ b = __bch2_btree_node_mem_alloc(c);
if (!b) {
bch_err(c, "read_btree_nodes_worker: error allocating buf");
w->f->ret = -ENOMEM;
goto err;
}
- struct bio *bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
+ bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
if (!bio) {
bch_err(c, "read_btree_nodes_worker: error allocating bio");
w->f->ret = -ENOMEM;
@@ -369,7 +359,7 @@ static int handle_overwrites(struct bch_fs *c,
int bch2_scan_for_btree_nodes(struct bch_fs *c)
{
struct find_btree_nodes *f = &c->found_btree_nodes;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
found_btree_nodes nodes_heap = {};
size_t dst;
int ret = 0;
@@ -476,7 +466,6 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
err:
darray_exit(&nodes_heap);
- printbuf_exit(&buf);
return ret;
}
@@ -548,7 +537,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
return ret;
if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "recovery ");
bch2_btree_id_level_to_text(&buf, btree, level);
@@ -558,7 +547,6 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
bch2_bpos_to_text(&buf, node_max);
bch_info(c, "%s(): %s", __func__, buf.buf);
- printbuf_exit(&buf);
}
struct found_btree_node search = {
@@ -582,10 +570,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
found_btree_node_to_key(&tmp.k, &n);
if (c->opts.verbose) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
- printbuf_exit(&buf);
}
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 7fcf248a9a76..58590ccc26bd 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -235,10 +235,10 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
- struct btree_trans *trans = bch2_trans_get(c);
unsigned long old, new;
unsigned idx = w - b->writes;
+ CLASS(btree_trans, trans)(c);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
old = READ_ONCE(b->flags);
@@ -257,8 +257,6 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
btree_node_write_if_need(trans, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
-
- bch2_trans_put(trans);
return 0;
}
@@ -591,7 +589,8 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
}
static inline int
-bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
+bch2_trans_commit_write_locked(struct btree_trans *trans,
+ enum bch_trans_commit_flags flags,
struct btree_insert_entry **stopped_at,
unsigned long trace_ip)
{
@@ -673,16 +672,20 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct bkey_i *accounting;
- percpu_down_read(&c->mark_lock);
- for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
- accounting != btree_trans_subbuf_top(trans, &trans->accounting);
- accounting = bkey_next(accounting)) {
- ret = bch2_accounting_trans_commit_hook(trans,
- bkey_i_to_accounting(accounting), flags);
- if (ret)
- goto revert_fs_usage;
- }
- percpu_up_read(&c->mark_lock);
+ scoped_guard(percpu_read, &c->mark_lock)
+ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
+ accounting != btree_trans_subbuf_top(trans, &trans->accounting);
+ accounting = bkey_next(accounting)) {
+ ret = bch2_accounting_trans_commit_hook(trans,
+ bkey_i_to_accounting(accounting), flags);
+ if (unlikely(ret)) {
+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
+ i != accounting;
+ i = bkey_next(i))
+ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
+ return ret;
+ }
+ }
/* XXX: we only want to run this if deltas are nonzero */
bch2_trans_account_disk_usage_change(trans);
@@ -769,12 +772,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
trans->journal_res.offset += trans->journal_entries.u64s;
trans->journal_res.u64s -= trans->journal_entries.u64s;
- memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res,
- BCH_JSET_ENTRY_write_buffer_keys,
- BTREE_ID_accounting, 0,
- trans->accounting.u64s)->_data,
- btree_trans_subbuf_base(trans, &trans->accounting),
- trans->accounting.u64s);
+ if (trans->accounting.u64s)
+ memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res,
+ BCH_JSET_ENTRY_write_buffer_keys,
+ BTREE_ID_accounting, 0,
+ trans->accounting.u64s)->_data,
+ btree_trans_subbuf_base(trans, &trans->accounting),
+ trans->accounting.u64s);
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
@@ -794,13 +798,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
return 0;
fatal_err:
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
- percpu_down_read(&c->mark_lock);
-revert_fs_usage:
- for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
- i != accounting;
- i = bkey_next(i))
- bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
- percpu_up_read(&c->mark_lock);
return ret;
}
@@ -826,7 +823,8 @@ static int bch2_trans_commit_journal_pin_flush(struct journal *j,
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
-static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags,
+static inline int do_bch2_trans_commit(struct btree_trans *trans,
+ enum bch_trans_commit_flags flags,
struct btree_insert_entry **stopped_at,
unsigned long trace_ip)
{
@@ -962,16 +960,33 @@ out:
* do.
*/
static noinline int
-do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
+do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans,
+ enum bch_trans_commit_flags flags)
{
struct bch_fs *c = trans->c;
+ int ret = 0;
BUG_ON(current != c->recovery_task);
+ struct bkey_i *accounting;
+
+ percpu_down_read(&c->mark_lock);
+ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
+ accounting != btree_trans_subbuf_top(trans, &trans->accounting);
+ accounting = bkey_next(accounting)) {
+ ret = likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))
+ ? bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(accounting),
+ BCH_ACCOUNTING_normal, false)
+ : 0;
+ if (ret)
+ goto revert_fs_usage;
+ }
+ percpu_up_read(&c->mark_lock);
+
trans_for_each_update(trans, i) {
- int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
if (ret)
- return ret;
+ goto fatal_err;
}
for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
@@ -980,9 +995,9 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
if (i->type == BCH_JSET_ENTRY_btree_keys ||
i->type == BCH_JSET_ENTRY_write_buffer_keys) {
jset_entry_for_each_key(i, k) {
- int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k);
+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, k);
if (ret)
- return ret;
+ goto fatal_err;
}
}
@@ -1000,15 +1015,27 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
i != btree_trans_subbuf_top(trans, &trans->accounting);
i = bkey_next(i)) {
- int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i);
+ ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i);
if (ret)
- return ret;
+ goto fatal_err;
}
return 0;
+fatal_err:
+ bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
+ percpu_down_read(&c->mark_lock);
+revert_fs_usage:
+ BUG();
+ /* error path not handled by __bch2_trans_commit() */
+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
+ i != accounting;
+ i = bkey_next(i))
+ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
+ percpu_up_read(&c->mark_lock);
+ return ret;
}
-int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
+int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags)
{
struct btree_insert_entry *errored_at = NULL;
struct bch_fs *c = trans->c;
@@ -1031,7 +1058,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
- ret = do_bch2_trans_commit_to_journal_replay(trans);
+ ret = do_bch2_trans_commit_to_journal_replay(trans, flags);
else
ret = bch_err_throw(c, erofs_trans_commit);
goto out_reset;
@@ -1039,11 +1066,15 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
- journal_u64s = jset_u64s(trans->accounting.u64s);
+ journal_u64s = 0;
+
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
if (trans->journal_transaction_names)
journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
+ if (trans->accounting.u64s)
+ journal_u64s += jset_u64s(trans->accounting.u64s);
+
trans_for_each_update(trans, i) {
struct btree_path *path = trans->paths + i->path;
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index 5d9e02370aff..f514a8ad7a89 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -661,21 +661,23 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
* @k: key to insert
* @disk_res: must be non-NULL whenever inserting or potentially
* splitting data extents
- * @flags: transaction commit flags
+ * @commit_flags: transaction commit flags
* @iter_flags: btree iter update trigger flags
*
* Returns: 0 on success, error code on failure
*/
int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
- struct disk_reservation *disk_res, int flags,
+ struct disk_reservation *disk_res,
+ enum bch_trans_commit_flags commit_flags,
enum btree_iter_update_trigger_flags iter_flags)
{
- return bch2_trans_commit_do(c, disk_res, NULL, flags,
- bch2_btree_insert_trans(trans, id, k, iter_flags));
+ CLASS(btree_trans, trans)(c);
+ return commit_do(trans, disk_res, NULL, commit_flags,
+ bch2_btree_insert_trans(trans, id, k, iter_flags));
}
-int bch2_btree_delete_at(struct btree_trans *trans,
- struct btree_iter *iter, unsigned update_flags)
+int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter,
+ enum btree_iter_update_trigger_flags flags)
{
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
int ret = PTR_ERR_OR_ZERO(k);
@@ -684,12 +686,12 @@ int bch2_btree_delete_at(struct btree_trans *trans,
bkey_init(&k->k);
k->k.p = iter->pos;
- return bch2_trans_update(trans, iter, k, update_flags);
+ return bch2_trans_update(trans, iter, k, flags);
}
int bch2_btree_delete(struct btree_trans *trans,
enum btree_id btree, struct bpos pos,
- unsigned update_flags)
+ enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
int ret;
@@ -698,7 +700,7 @@ int bch2_btree_delete(struct btree_trans *trans,
BTREE_ITER_cached|
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(trans, &iter) ?:
- bch2_btree_delete_at(trans, &iter, update_flags);
+ bch2_btree_delete_at(trans, &iter, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -706,7 +708,7 @@ int bch2_btree_delete(struct btree_trans *trans,
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned update_flags,
+ enum btree_iter_update_trigger_flags flags,
u64 *journal_seq)
{
u32 restart_count = trans->restart_count;
@@ -714,7 +716,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bkey_s_c k;
int ret = 0;
- bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent);
+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent|flags);
while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(trans->c, 0);
@@ -747,7 +749,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
bpos_min(end, k.k->p).offset -
iter.pos.offset);
- ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
+ ret = bch2_trans_update(trans, &iter, &delete, flags) ?:
bch2_trans_commit(trans, &disk_res, journal_seq,
BCH_TRANS_COMMIT_no_enospc);
bch2_disk_reservation_put(trans->c, &disk_res);
@@ -777,12 +779,11 @@ err:
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned update_flags,
+ enum btree_iter_update_trigger_flags flags,
u64 *journal_seq)
{
- int ret = bch2_trans_run(c,
- bch2_btree_delete_range_trans(trans, id, start, end,
- update_flags, journal_seq));
+ CLASS(btree_trans, trans)(c);
+ int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags, journal_seq);
if (ret == -BCH_ERR_transaction_restart_nested)
ret = 0;
return ret;
@@ -876,31 +877,31 @@ static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
va_list args)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_vprintf(&buf, fmt, args);
unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
- goto err;
+ return ret;
if (!test_bit(JOURNAL_running, &c->journal.flags)) {
ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s));
if (ret)
- goto err;
+ return ret;
struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
- ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
- bch2_trans_log_msg(trans, &buf));
+ CLASS(btree_trans, trans)(c);
+ ret = commit_do(trans, NULL, NULL, commit_flags,
+ bch2_trans_log_msg(trans, &buf));
}
-err:
- printbuf_exit(&buf);
- return ret;
+
+ return 0;
}
__printf(2, 3)
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 2c6f9b44d888..633de3b3ac28 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -47,22 +47,27 @@ enum bch_trans_commit_flags {
void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags);
-int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *,
+ enum btree_iter_update_trigger_flags);
+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos,
+ enum btree_iter_update_trigger_flags);
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
struct bkey_i *, enum btree_iter_update_trigger_flags);
int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
enum btree_iter_update_trigger_flags);
-int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct
- disk_reservation *, int flags, enum
- btree_iter_update_trigger_flags iter_flags);
+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
+ struct disk_reservation *,
+ enum bch_trans_commit_flags,
+ enum btree_iter_update_trigger_flags);
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
- struct bpos, struct bpos, unsigned, u64 *);
+ struct bpos, struct bpos,
+ enum btree_iter_update_trigger_flags, u64 *);
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
- struct bpos, struct bpos, unsigned, u64 *);
+ struct bpos, struct bpos,
+ enum btree_iter_update_trigger_flags, u64 *);
int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool);
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
@@ -186,19 +191,29 @@ int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bk
int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *);
+static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree,
+ struct bkey_i *k)
+{
+ if (unlikely(!btree_type_uses_write_buffer(btree) ||
+ k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX)) {
+ int ret = bch2_btree_write_buffer_insert_err(c, btree, k);
+ dump_stack();
+ return ret;
+ }
+
+ return 0;
+}
+
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
kmsan_check_memory(k, bkey_bytes(&k->k));
- EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
-
- if (unlikely(!btree_type_uses_write_buffer(btree))) {
- int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k);
- dump_stack();
+ int ret = bch2_btree_write_buffer_insert_checks(trans->c, btree, k);
+ if (unlikely(ret))
return ret;
- }
+
/*
* Most updates skip the btree write buffer until journal replay is
* finished because synchronization with journal replay relies on having
@@ -215,7 +230,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
return bch2_btree_insert_clone_trans(trans, btree, k);
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
- int ret = PTR_ERR_OR_ZERO(e);
+ ret = PTR_ERR_OR_ZERO(e);
if (ret)
return ret;
@@ -226,7 +241,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
-int __bch2_trans_commit(struct btree_trans *, unsigned);
+int __bch2_trans_commit(struct btree_trans *, enum bch_trans_commit_flags);
int bch2_trans_log_str(struct btree_trans *, const char *);
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
@@ -263,6 +278,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))
+/* deprecated, prefer CLASS(btree_trans) */
#define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \
bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 8e3d3db2c53b..312ef203b27b 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -53,7 +53,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: b->data->min_key;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct bkey_buf prev;
int ret = 0;
@@ -133,7 +133,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
out:
bch2_btree_and_journal_iter_exit(&iter);
bch2_bkey_buf_exit(&prev, c);
- printbuf_exit(&buf);
return ret;
err:
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
@@ -240,9 +239,8 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
__btree_node_free(trans, b);
- mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
six_unlock_write(&b->c.lock);
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
@@ -268,9 +266,8 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
clear_btree_node_dirty_acct(c, b);
clear_btree_node_need_write(b);
- mutex_lock(&c->btree_cache.lock);
- __bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
BUG_ON(p->nr >= ARRAY_SIZE(p->b));
p->b[p->nr++] = b;
@@ -285,7 +282,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct closure *cl,
bool interior_node,
unsigned target,
- unsigned flags)
+ enum bch_trans_commit_flags flags)
{
struct bch_fs *c = trans->c;
struct write_point *wp;
@@ -305,13 +302,18 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
mutex_lock(&c->btree_reserve_cache_lock);
if (c->btree_reserve_cache_nr > nr_reserve) {
- struct btree_alloc *a =
- &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-
- bkey_copy(&b->key, &a->k);
- b->ob = a->ob;
- mutex_unlock(&c->btree_reserve_cache_lock);
- goto out;
+ for (struct btree_alloc *a = c->btree_reserve_cache;
+ a < c->btree_reserve_cache + c->btree_reserve_cache_nr;
+ a++) {
+ if (target && !bch2_bkey_in_target(c, bkey_i_to_s_c(&a->k), target))
+ continue;
+
+ bkey_copy(&b->key, &a->k);
+ b->ob = a->ob;
+ *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+ mutex_unlock(&c->btree_reserve_cache_lock);
+ goto out;
+ }
}
mutex_unlock(&c->btree_reserve_cache_lock);
retry:
@@ -555,7 +557,8 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
as->start_time);
- mutex_lock(&c->btree_interior_update_lock);
+ guard(mutex)(&c->btree_interior_update_lock);
+
list_del(&as->unwritten_list);
list_del(&as->list);
@@ -567,8 +570,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
* since being on btree_interior_update_list is our ref on @c:
*/
closure_wake_up(&c->btree_interior_update_wait);
-
- mutex_unlock(&c->btree_interior_update_lock);
}
static void btree_update_add_key(struct btree_update *as,
@@ -597,12 +598,11 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
{
struct bch_fs *c = as->c;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
for_each_keylist_key(&as->new_keys, k)
bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
/*
@@ -654,7 +654,7 @@ static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
struct btree *b;
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
u64 journal_seq = 0;
unsigned i;
int ret;
@@ -813,15 +813,15 @@ err:
bch2_journal_pin_drop(&c->journal, &as->journal);
- mutex_lock(&c->btree_interior_update_lock);
- for (i = 0; i < as->nr_new_nodes; i++) {
- b = as->new_nodes[i];
+ scoped_guard(mutex, &c->btree_interior_update_lock) {
+ for (i = 0; i < as->nr_new_nodes; i++) {
+ b = as->new_nodes[i];
- BUG_ON(b->will_make_reachable != (unsigned long) as);
- b->will_make_reachable = 0;
- clear_btree_node_will_make_reachable(b);
+ BUG_ON(b->will_make_reachable != (unsigned long) as);
+ b->will_make_reachable = 0;
+ clear_btree_node_will_make_reachable(b);
+ }
}
- mutex_unlock(&c->btree_interior_update_lock);
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
@@ -835,7 +835,6 @@ err:
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
bch2_btree_update_free(as, trans);
- bch2_trans_put(trans);
}
static void btree_interior_update_work(struct work_struct *work)
@@ -845,12 +844,12 @@ static void btree_interior_update_work(struct work_struct *work)
struct btree_update *as;
while (1) {
- mutex_lock(&c->btree_interior_update_lock);
- as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
- struct btree_update, unwritten_list);
- if (as && !as->nodes_written)
- as = NULL;
- mutex_unlock(&c->btree_interior_update_lock);
+ scoped_guard(mutex, &c->btree_interior_update_lock) {
+ as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
+ struct btree_update, unwritten_list);
+ if (as && !as->nodes_written)
+ as = NULL;
+ }
if (!as)
break;
@@ -864,9 +863,8 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written)
closure_type(as, struct btree_update, cl);
struct bch_fs *c = as->c;
- mutex_lock(&c->btree_interior_update_lock);
- as->nodes_written = true;
- mutex_unlock(&c->btree_interior_update_lock);
+ scoped_guard(mutex, &c->btree_interior_update_lock)
+ as->nodes_written = true;
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
}
@@ -884,7 +882,7 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
- mutex_lock(&c->btree_interior_update_lock);
+ guard(mutex)(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
as->mode = BTREE_UPDATE_node;
@@ -893,8 +891,6 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
-
- mutex_unlock(&c->btree_interior_update_lock);
}
static int bch2_update_reparent_journal_pin_flush(struct journal *j,
@@ -933,11 +929,11 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
b->c.btree_id, b->c.level,
insert, insert->k.u64s);
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+ scoped_guard(mutex, &c->btree_interior_update_lock) {
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
- as->mode = BTREE_UPDATE_root;
- mutex_unlock(&c->btree_interior_update_lock);
+ as->mode = BTREE_UPDATE_root;
+ }
}
/*
@@ -958,7 +954,8 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
closure_get(&as->cl);
- mutex_lock(&c->btree_interior_update_lock);
+ guard(mutex)(&c->btree_interior_update_lock);
+
BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
BUG_ON(b->will_make_reachable);
@@ -966,8 +963,6 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
b->will_make_reachable = 1UL|(unsigned long) as;
set_btree_node_will_make_reachable(b);
- mutex_unlock(&c->btree_interior_update_lock);
-
btree_update_add_key(as, &as->new_keys, b);
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
@@ -986,31 +981,29 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
{
struct btree_update *as;
unsigned long v;
- unsigned i;
- mutex_lock(&c->btree_interior_update_lock);
- /*
- * When b->will_make_reachable != 0, it owns a ref on as->cl that's
- * dropped when it gets written by bch2_btree_complete_write - the
- * xchg() is for synchronization with bch2_btree_complete_write:
- */
- v = xchg(&b->will_make_reachable, 0);
- clear_btree_node_will_make_reachable(b);
- as = (struct btree_update *) (v & ~1UL);
+ scoped_guard(mutex, &c->btree_interior_update_lock) {
+ /*
+ * When b->will_make_reachable != 0, it owns a ref on as->cl that's
+ * dropped when it gets written by bch2_btree_complete_write - the
+ * xchg() is for synchronization with bch2_btree_complete_write:
+ */
+ v = xchg(&b->will_make_reachable, 0);
+ clear_btree_node_will_make_reachable(b);
+ as = (struct btree_update *) (v & ~1UL);
- if (!as) {
- mutex_unlock(&c->btree_interior_update_lock);
- return;
- }
+ if (!as)
+ return;
- for (i = 0; i < as->nr_new_nodes; i++)
- if (as->new_nodes[i] == b)
- goto found;
+ unsigned i;
+ for (i = 0; i < as->nr_new_nodes; i++)
+ if (as->new_nodes[i] == b)
+ goto found;
- BUG();
-found:
- array_remove_item(as->new_nodes, as->nr_new_nodes, i);
- mutex_unlock(&c->btree_interior_update_lock);
+ BUG();
+ found:
+ array_remove_item(as->new_nodes, as->nr_new_nodes, i);
+ }
if (v & 1)
closure_put(&as->cl);
@@ -1139,7 +1132,8 @@ static const char * const btree_node_reawrite_reason_strs[] = {
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned level_start, bool split,
- unsigned target, unsigned flags)
+ unsigned target,
+ enum bch_trans_commit_flags flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1226,9 +1220,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
bch2_keylist_init(&as->new_keys, as->_new_keys);
bch2_keylist_init(&as->parent_keys, as->inline_keys);
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->list, &c->btree_interior_update_list);
- mutex_unlock(&c->btree_interior_update_lock);
+ scoped_guard(mutex, &c->btree_interior_update_lock)
+ list_add_tail(&as->list, &c->btree_interior_update_list);
struct btree *b = btree_path_node(path, path->level);
as->node_start = b->data->min_key;
@@ -1312,13 +1305,11 @@ err:
static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
{
/* Root nodes cannot be reaped */
- mutex_lock(&c->btree_cache.lock);
- list_del_init(&b->list);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ list_del_init(&b->list);
- mutex_lock(&c->btree_root_lock);
- bch2_btree_id_root(c, b->c.btree_id)->b = b;
- mutex_unlock(&c->btree_root_lock);
+ scoped_guard(mutex, &c->btree_root_lock)
+ bch2_btree_id_root(c, b->c.btree_id)->b = b;
bch2_recalc_btree_reserve(c);
}
@@ -1373,7 +1364,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
{
struct bch_fs *c = as->c;
struct bkey_packed *k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
unsigned long old, new;
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
@@ -1418,8 +1409,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
new |= BTREE_WRITE_interior;
new |= 1 << BTREE_NODE_need_write;
} while (!try_cmpxchg(&b->flags, &old, new));
-
- printbuf_exit(&buf);
}
static int
@@ -1446,7 +1435,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
int ret = bch2_btree_node_check_topology(trans, b);
if (ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
for (struct bkey_i *k = keys->keys;
k != insert;
@@ -1833,7 +1822,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_verify_keylist_sorted(keys);
if (!btree_node_intent_locked(path, b->c.level)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "%s(): node not locked at level %u\n",
__func__, b->c.level);
@@ -1842,7 +1831,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return -EIO;
}
@@ -1965,9 +1953,8 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
bch2_trans_node_add(trans, path, n);
six_unlock_intent(&n->c.lock);
- mutex_lock(&c->btree_cache.lock);
- list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list);
- mutex_unlock(&c->btree_cache.lock);
+ scoped_guard(mutex, &c->btree_cache.lock)
+ list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list);
bch2_trans_verify_locks(trans);
}
@@ -2067,7 +2054,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
}
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
printbuf_indent_add_nextline(&buf, 2);
prt_printf(&buf, "%s(): ", __func__);
@@ -2082,7 +2069,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_bpos_to_text(&buf, next->data->min_key);
bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
goto err;
}
@@ -2222,7 +2208,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
unsigned target,
- unsigned flags)
+ enum bch_trans_commit_flags flags)
{
struct bch_fs *c = trans->c;
struct btree *n, *parent;
@@ -2287,7 +2273,8 @@ err:
int bch2_btree_node_rewrite_key(struct btree_trans *trans,
enum btree_id btree, unsigned level,
- struct bkey_i *k, unsigned flags)
+ struct bkey_i *k,
+ enum bch_trans_commit_flags flags)
{
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter,
@@ -2311,7 +2298,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bpos pos,
unsigned target,
- unsigned flags)
+ enum bch_trans_commit_flags flags)
{
BUG_ON(!level);
@@ -2330,7 +2317,8 @@ err:
}
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
- struct btree *b, unsigned flags)
+ struct btree *b,
+ enum bch_trans_commit_flags flags)
{
struct btree_iter iter;
int ret = get_iter_to_node(trans, &iter, b);
@@ -2363,9 +2351,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
!bch2_err_matches(ret, EROFS))
bch_err_fn_ratelimited(c, ret);
- spin_lock(&c->btree_node_rewrites_lock);
- list_del(&a->list);
- spin_unlock(&c->btree_node_rewrites_lock);
+ scoped_guard(spinlock, &c->btree_node_rewrites_lock)
+ list_del(&a->list);
closure_wake_up(&c->btree_node_rewrites_wait);
@@ -2390,16 +2377,16 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
bool now = false, pending = false;
- spin_lock(&c->btree_node_rewrites_lock);
- if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
- enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
- list_add(&a->list, &c->btree_node_rewrites);
- now = true;
- } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
- list_add(&a->list, &c->btree_node_rewrites_pending);
- pending = true;
+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
+ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
+ list_add(&a->list, &c->btree_node_rewrites);
+ now = true;
+ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
+ list_add(&a->list, &c->btree_node_rewrites_pending);
+ pending = true;
+ }
}
- spin_unlock(&c->btree_node_rewrites_lock);
if (now) {
queue_work(c->btree_node_rewrite_worker, &a->work);
@@ -2420,13 +2407,14 @@ void bch2_async_btree_node_rewrites_flush(struct bch_fs *c)
void bch2_do_pending_node_rewrites(struct bch_fs *c)
{
while (1) {
- spin_lock(&c->btree_node_rewrites_lock);
- struct async_btree_rewrite *a =
- list_pop_entry(&c->btree_node_rewrites_pending,
- struct async_btree_rewrite, list);
- if (a)
- list_add(&a->list, &c->btree_node_rewrites);
- spin_unlock(&c->btree_node_rewrites_lock);
+ struct async_btree_rewrite *a;
+
+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
+ a = list_pop_entry(&c->btree_node_rewrites_pending,
+ struct async_btree_rewrite, list);
+ if (a)
+ list_add(&a->list, &c->btree_node_rewrites);
+ }
if (!a)
break;
@@ -2439,11 +2427,11 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
void bch2_free_pending_node_rewrites(struct bch_fs *c)
{
while (1) {
- spin_lock(&c->btree_node_rewrites_lock);
- struct async_btree_rewrite *a =
- list_pop_entry(&c->btree_node_rewrites_pending,
- struct async_btree_rewrite, list);
- spin_unlock(&c->btree_node_rewrites_lock);
+ struct async_btree_rewrite *a;
+
+ scoped_guard(spinlock, &c->btree_node_rewrites_lock)
+ a = list_pop_entry(&c->btree_node_rewrites_pending,
+ struct async_btree_rewrite, list);
if (!a)
break;
@@ -2525,7 +2513,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
if (new_hash) {
- mutex_lock(&c->btree_cache.lock);
+ guard(mutex)(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
__bch2_btree_node_hash_remove(&c->btree_cache, b);
@@ -2533,7 +2521,6 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bkey_copy(&b->key, new_key);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
BUG_ON(ret);
- mutex_unlock(&c->btree_cache.lock);
} else {
bkey_copy(&b->key, new_key);
}
@@ -2544,9 +2531,8 @@ out:
return ret;
err:
if (new_hash) {
- mutex_lock(&c->btree_cache.lock);
+ guard(mutex)(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_unlock(&c->btree_cache.lock);
}
goto out;
}
@@ -2681,7 +2667,8 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id
void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
{
- bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)));
+ CLASS(btree_trans, trans)(c);
+ lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level));
}
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
@@ -2714,21 +2701,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_update *as;
- mutex_lock(&c->btree_interior_update_lock);
+ guard(mutex)(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
bch2_btree_update_to_text(out, as);
- mutex_unlock(&c->btree_interior_update_lock);
}
static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{
- bool ret;
-
- mutex_lock(&c->btree_interior_update_lock);
- ret = !list_empty(&c->btree_interior_update_list);
- mutex_unlock(&c->btree_interior_update_lock);
-
- return ret;
+ guard(mutex)(&c->btree_interior_update_lock);
+ return !list_empty(&c->btree_interior_update_list);
}
bool bch2_btree_interior_updates_flush(struct bch_fs *c)
@@ -2745,13 +2726,11 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
{
struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
- mutex_lock(&c->btree_root_lock);
+ guard(mutex)(&c->btree_interior_update_lock);
r->level = entry->level;
r->alive = true;
bkey_copy(&r->key, (struct bkey_i *) entry->start);
-
- mutex_unlock(&c->btree_root_lock);
}
struct jset_entry *
@@ -2759,11 +2738,9 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
struct jset_entry *end,
unsigned long skip)
{
- unsigned i;
+ guard(mutex)(&c->btree_interior_update_lock);
- mutex_lock(&c->btree_root_lock);
-
- for (i = 0; i < btree_id_nr_alive(c); i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (r->alive && !test_bit(i, &skip)) {
@@ -2773,8 +2750,6 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
}
}
- mutex_unlock(&c->btree_root_lock);
-
return end;
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index ac04e45a8515..6ed049f19a9a 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -175,15 +175,19 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
}
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
- struct btree *, unsigned, unsigned);
+ struct btree *, unsigned,
+ enum bch_trans_commit_flags);
int bch2_btree_node_rewrite_key(struct btree_trans *,
enum btree_id, unsigned,
- struct bkey_i *, unsigned);
+ struct bkey_i *,
+ enum bch_trans_commit_flags);
int bch2_btree_node_rewrite_pos(struct btree_trans *,
enum btree_id, unsigned,
- struct bpos, unsigned, unsigned);
+ struct bpos, unsigned,
+ enum bch_trans_commit_flags);
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *,
- struct btree *, unsigned);
+ struct btree *,
+ enum bch_trans_commit_flags);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 4b095235a0d2..9cfc3edce39a 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -259,9 +259,8 @@ out:
bch2_btree_write_buffer_journal_flush);
if (j->watermark) {
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_set_watermark(j);
- spin_unlock(&j->lock);
}
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
@@ -270,7 +269,7 @@ out:
int bch2_btree_write_buffer_insert_err(struct bch_fs *c,
enum btree_id btree, struct bkey_i *k)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
bch2_btree_id_to_text(&buf, btree);
@@ -278,7 +277,6 @@ int bch2_btree_write_buffer_insert_err(struct bch_fs *c,
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
bch2_fs_inconsistent(c, "%s", buf.buf);
- printbuf_exit(&buf);
return -EROFS;
}
@@ -300,9 +298,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bch2_trans_unlock(trans);
bch2_trans_begin(trans);
- mutex_lock(&wb->inc.lock);
- move_keys_from_inc_to_flushing(wb);
- mutex_unlock(&wb->inc.lock);
+ scoped_guard(mutex, &wb->inc.lock)
+ move_keys_from_inc_to_flushing(wb);
for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
wb->sorted.data[i].idx = i;
@@ -330,10 +327,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
- if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
- ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k);
+ ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k);
+ if (unlikely(ret))
goto err;
- }
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
@@ -534,9 +530,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq)
ret = bch2_journal_keys_to_write_buffer(c, buf);
if (!blocked && !ret) {
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
buf->need_flush_to_write_buffer = false;
- spin_unlock(&j->lock);
}
mutex_unlock(&j->buf_lock);
@@ -568,9 +563,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq,
* On memory allocation failure, bch2_btree_write_buffer_flush_locked()
* is not guaranteed to empty wb->inc:
*/
- mutex_lock(&wb->flushing.lock);
- ret = bch2_btree_write_buffer_flush_locked(trans);
- mutex_unlock(&wb->flushing.lock);
+ scoped_guard(mutex, &wb->flushing.lock)
+ ret = bch2_btree_write_buffer_flush_locked(trans);
} while (!ret &&
(fetch_from_journal_err ||
(wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) ||
@@ -583,9 +577,10 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
struct journal_entry_pin *_pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ CLASS(btree_trans, trans)(c);
bool did_work = false;
- return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work));
+ return btree_write_buffer_flush_seq(trans, seq, &did_work);
}
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
@@ -607,9 +602,9 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
if (bch2_journal_error(&c->journal))
return false;
+ CLASS(btree_trans, trans)(c);
bool did_work = false;
- bch2_trans_run(c, btree_write_buffer_flush_seq(trans,
- journal_cur_seq(&c->journal), &did_work));
+ btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work);
return did_work;
}
@@ -656,11 +651,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
if (trace_write_buffer_maybe_flush_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, referring_k);
trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf);
- printbuf_exit(&buf);
}
bch2_bkey_buf_reassemble(&tmp, c, referring_k);
@@ -691,11 +685,12 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
struct btree_write_buffer *wb = &c->btree_write_buffer;
int ret;
- mutex_lock(&wb->flushing.lock);
- do {
- ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
- } while (!ret && bch2_btree_write_buffer_should_flush(c));
- mutex_unlock(&wb->flushing.lock);
+ scoped_guard(mutex, &wb->flushing.lock) {
+ CLASS(btree_trans, trans)(c);
+ do {
+ ret = bch2_btree_write_buffer_flush_locked(trans);
+ } while (!ret && bch2_btree_write_buffer_should_flush(c));
+ }
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
}
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index c351d21aca0b..e484cd6b90b0 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -89,11 +89,9 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
- if (unlikely(!btree_type_uses_write_buffer(btree))) {
- int ret = bch2_btree_write_buffer_insert_err(c, btree, k);
- dump_stack();
+ int ret = bch2_btree_write_buffer_insert_checks(c, btree, k);
+ if (unlikely(ret))
return ret;
- }
EBUG_ON(!dst->seq);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index f25903c10e8a..5aab527e3e7c 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -71,13 +71,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *c)
{
- struct bch_fs_usage_short ret;
-
- percpu_down_read(&c->mark_lock);
- ret = __bch2_fs_usage_read_short(c);
- percpu_up_read(&c->mark_lock);
-
- return ret;
+ guard(percpu_read)(&c->mark_lock);
+ return __bch2_fs_usage_read_short(c);
}
void bch2_dev_usage_to_text(struct printbuf *out,
@@ -113,10 +108,10 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
bool *do_update)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
- struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
+ CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev);
if (!ca) {
if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
trans, ptr_to_invalid_device,
@@ -138,7 +133,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
*do_update = true;
- goto out;
+ return 0;
}
enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
@@ -158,7 +153,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
} else {
/* this pointer will be dropped */
*do_update = true;
- goto out;
+ return 0;
}
}
@@ -208,7 +203,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
*do_update = true;
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
- goto out;
+ return 0;
if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type),
trans, ptr_bucket_data_type_mismatch,
@@ -224,14 +219,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
switch (g->data_type) {
case BCH_DATA_sb:
bch_err(c, "btree and superblock in the same bucket - cannot repair");
- ret = bch_err_throw(c, fsck_repair_unimplemented);
- goto out;
+ return bch_err_throw(c, fsck_repair_unimplemented);
case BCH_DATA_journal:
ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr));
bch_err_msg(c, ret, "error deleting journal bucket %zu",
PTR_BUCKET_NR(ca, &p.ptr));
if (ret)
- goto out;
+ return ret;
break;
}
@@ -265,10 +259,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
*do_update = true;
}
-out:
fsck_err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
return ret;
}
@@ -281,7 +272,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
const union bch_extent_entry *entry_c;
struct extent_ptr_decoded p = { 0 };
bool do_update = false;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
/* We don't yet do btree key updates correctly for when we're RW */
@@ -290,14 +281,14 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
if (ret)
- goto err;
+ return ret;
}
if (do_update) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- goto err;
+ return ret;
scoped_guard(rcu)
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
@@ -387,7 +378,7 @@ found:
BTREE_TRIGGER_norun);
bch2_trans_iter_exit(trans, &iter);
if (ret)
- goto err;
+ return ret;
if (level)
bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
@@ -396,7 +387,7 @@ found:
jset_u64s(new->k.u64s));
ret = PTR_ERR_OR_ZERO(e);
if (ret)
- goto err;
+ return ret;
journal_entry_set(e,
BCH_JSET_ENTRY_btree_root,
@@ -413,9 +404,8 @@ found:
bkey_copy(&b->key, new);
}
}
-err:
- printbuf_exit(&buf);
- return ret;
+
+ return 0;
}
static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf,
@@ -460,9 +450,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
{
struct bch_fs *c = trans->c;
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool inserting = sectors > 0;
- int ret = 0;
BUG_ON(!sectors);
@@ -474,9 +463,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
ptr->gen);
- ret = bucket_ref_update_err(trans, &buf, k, inserting,
- BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen);
- goto out;
+ return bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen);
}
if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) {
@@ -487,15 +475,12 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
ptr->gen);
- ret = bucket_ref_update_err(trans, &buf, k, inserting,
- BCH_FSCK_ERR_ptr_too_stale);
- goto out;
+ return bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_ptr_too_stale);
}
- if (b_gen != ptr->gen && ptr->cached) {
- ret = 1;
- goto out;
- }
+ if (b_gen != ptr->gen && ptr->cached)
+ return 1;
if (unlikely(b_gen != ptr->gen)) {
bch2_log_msg_start(c, &buf);
@@ -506,9 +491,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
ptr->gen);
- ret = bucket_ref_update_err(trans, &buf, k, inserting,
- BCH_FSCK_ERR_stale_dirty_ptr);
- goto out;
+ return bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_stale_dirty_ptr);
}
if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) {
@@ -518,9 +502,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
bch2_data_type_str(bucket_data_type),
bch2_data_type_str(ptr_data_type));
- ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ return bucket_ref_update_err(trans, &buf, k, inserting,
BCH_FSCK_ERR_ptr_bucket_data_type_mismatch);
- goto out;
}
if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) {
@@ -531,16 +514,13 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
*bucket_sectors, sectors);
- ret = bucket_ref_update_err(trans, &buf, k, inserting,
- BCH_FSCK_ERR_bucket_sector_count_overflow);
sectors = -*bucket_sectors;
- goto out;
+ return bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_bucket_sector_count_overflow);
}
*bucket_sectors += sectors;
-out:
- printbuf_exit(&buf);
- return ret;
+ return 0;
}
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
@@ -550,7 +530,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
static int warned_disk_usage = 0;
bool warn = false;
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
s64 added = src->btree + src->data + src->reserved;
@@ -578,11 +558,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
this_cpu_sub(*c->online_reserved, added);
}
- preempt_disable();
- struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
- acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
- preempt_enable();
- percpu_up_read(&c->mark_lock);
+ scoped_guard(preempt) {
+ struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
+ acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
+ }
if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
bch2_trans_inconsistent(trans,
@@ -621,40 +600,34 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
bool insert = !(flags & BTREE_TRIGGER_overwrite);
- struct printbuf buf = PRINTBUF;
- int ret = 0;
+ CLASS(printbuf, buf)();
struct bkey_i_backpointer bp;
bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp);
*sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len;
- struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
+ CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev);
if (unlikely(!ca)) {
if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
- ret = bch_err_throw(c, trigger_pointer);
- goto err;
+ return bch_err_throw(c, trigger_pointer);
+ return 0;
}
struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
if (!bucket_valid(ca, bucket.offset)) {
if (insert) {
bch2_dev_bucket_missing(ca, bucket.offset);
- ret = bch_err_throw(c, trigger_pointer);
+ return bch_err_throw(c, trigger_pointer);
}
- goto err;
+ return 0;
}
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
- ret = PTR_ERR_OR_ZERO(a) ?:
- __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert);
- if (ret)
- goto err;
-
- ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
- if (ret)
- goto err;
+ return PTR_ERR_OR_ZERO(a) ?:
+ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert) ?:
+ bch2_bucket_backpointer_mod(trans, k, &bp, insert);
}
if (flags & BTREE_TRIGGER_gc) {
@@ -662,23 +635,22 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
p.ptr.dev,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = bch_err_throw(c, trigger_pointer);
- goto err;
+ return bch_err_throw(c, trigger_pointer);
}
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert);
+ int ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert);
alloc_to_bucket(g, new);
bucket_unlock(g);
- if (!ret)
- ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
+ if (ret)
+ return ret;
+
+ return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
-err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
- return ret;
+
+ return 0;
}
static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
@@ -738,14 +710,13 @@ err:
if (!m || !m->alive) {
gc_stripe_unlock(m);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ",
(u64) p.ec.idx);
bch2_bkey_val_to_text(&buf, c, k);
__bch2_inconsistent_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return bch_err_throw(c, trigger_stripe_pointer);
}
@@ -996,7 +967,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
return PTR_ERR(a);
if (a->v.data_type && type && a->v.data_type != type) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s\n",
@@ -1012,7 +983,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
/* Always print, this is always fatal */
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
if (!ret)
ret = bch_err_throw(c, metadata_bucket_inconsistency);
goto err;
@@ -1034,7 +1004,6 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
- int ret = 0;
struct bucket *g = gc_bucket(ca, b);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
@@ -1062,8 +1031,7 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
g->dirty_sectors += sectors;
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
bucket_unlock(g);
- ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
- return ret;
+ return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
err_unlock:
bucket_unlock(g);
err:
@@ -1125,10 +1093,10 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
+ struct bch_sb_layout layout;
- mutex_lock(&c->sb_lock);
- struct bch_sb_layout layout = ca->disk_sb.sb->layout;
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock)
+ layout = ca->disk_sb.sb->layout;
u64 bucket = 0;
unsigned i, bucket_sectors = 0;
@@ -1173,8 +1141,8 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c
int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
enum btree_iter_update_trigger_flags flags)
{
- int ret = bch2_trans_run(c,
- __bch2_trans_mark_dev_sb(trans, ca, flags));
+ CLASS(btree_trans, trans)(c);
+ int ret = __bch2_trans_mark_dev_sb(trans, ca, flags);
bch_err_fn(c, ret);
return ret;
}
@@ -1227,15 +1195,38 @@ bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b)
#define SECTORS_CACHE 1024
+static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
+ struct disk_reservation *res,
+ u64 sectors, enum bch_reservation_flags flags)
+{
+ guard(mutex)(&c->sectors_available_lock);
+
+ percpu_u64_set(&c->pcpu->sectors_available, 0);
+ u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
+
+ if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
+ sectors = min(sectors, sectors_available);
+
+ if (sectors <= sectors_available ||
+ (flags & BCH_DISK_RESERVATION_NOFAIL)) {
+ atomic64_set(&c->sectors_available,
+ max_t(s64, 0, sectors_available - sectors));
+ this_cpu_add(*c->online_reserved, sectors);
+ res->sectors += sectors;
+ return 0;
+ } else {
+ atomic64_set(&c->sectors_available, sectors_available);
+ return bch_err_throw(c, ENOSPC_disk_reservation);
+ }
+}
+
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, enum bch_reservation_flags flags)
{
struct bch_fs_pcpu *pcpu;
u64 old, get;
- u64 sectors_available;
- int ret;
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
preempt_disable();
pcpu = this_cpu_ptr(c->pcpu);
@@ -1246,9 +1237,10 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
do {
get = min((u64) sectors + SECTORS_CACHE, old);
- if (get < sectors) {
+ if (unlikely(get < sectors)) {
preempt_enable();
- goto recalculate;
+ return disk_reservation_recalc_sectors_available(c,
+ res, sectors, flags);
}
} while (!atomic64_try_cmpxchg(&c->sectors_available,
&old, old - get));
@@ -1259,36 +1251,8 @@ out:
pcpu->sectors_available -= sectors;
this_cpu_add(*c->online_reserved, sectors);
res->sectors += sectors;
-
preempt_enable();
- percpu_up_read(&c->mark_lock);
return 0;
-
-recalculate:
- mutex_lock(&c->sectors_available_lock);
-
- percpu_u64_set(&c->pcpu->sectors_available, 0);
- sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
-
- if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
- sectors = min(sectors, sectors_available);
-
- if (sectors <= sectors_available ||
- (flags & BCH_DISK_RESERVATION_NOFAIL)) {
- atomic64_set(&c->sectors_available,
- max_t(s64, 0, sectors_available - sectors));
- this_cpu_add(*c->online_reserved, sectors);
- res->sectors += sectors;
- ret = 0;
- } else {
- atomic64_set(&c->sectors_available, sectors_available);
- ret = bch_err_throw(c, ENOSPC_disk_reservation);
- }
-
- mutex_unlock(&c->sectors_available_lock);
- percpu_up_read(&c->mark_lock);
-
- return ret;
}
/* Startup/shutdown: */
diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
index 832eff93acb6..ca341586920b 100644
--- a/fs/bcachefs/buckets_waiting_for_journal.c
+++ b/fs/bcachefs/buckets_waiting_for_journal.c
@@ -25,25 +25,20 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_
u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b,
unsigned dev, u64 bucket)
{
- struct buckets_waiting_for_journal_table *t;
u64 dev_bucket = (u64) dev << 56 | bucket;
- u64 ret = 0;
- mutex_lock(&b->lock);
- t = b->t;
+ guard(mutex)(&b->lock);
+
+ struct buckets_waiting_for_journal_table *t = b->t;
for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
- if (h->dev_bucket == dev_bucket) {
- ret = h->journal_seq;
- break;
- }
+ if (h->dev_bucket == dev_bucket)
+ return h->journal_seq;
}
- mutex_unlock(&b->lock);
-
- return ret;
+ return 0;
}
static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t,
@@ -92,12 +87,11 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
.journal_seq = journal_seq,
};
size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0;
- int ret = 0;
- mutex_lock(&b->lock);
+ guard(mutex)(&b->lock);
if (likely(bucket_table_insert(b->t, &new, flushed_seq)))
- goto out;
+ return 0;
t = b->t;
size = 1UL << t->bits;
@@ -109,8 +103,7 @@ realloc:
n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
if (!n) {
struct bch_fs *c = container_of(b, struct bch_fs, buckets_waiting_for_journal);
- ret = bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set);
- goto out;
+ return bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set);
}
retry_rehash:
@@ -143,10 +136,7 @@ retry_rehash:
pr_debug("took %zu rehashes, table at %zu/%lu elements",
nr_rehashes, nr_elements, 1UL << b->t->bits);
-out:
- mutex_unlock(&b->lock);
-
- return ret;
+ return 0;
}
void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 5ea89aa2b0c4..467fc45e84fe 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -52,6 +52,11 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
return ca;
}
+DEFINE_CLASS(bch2_device_lookup, struct bch_dev *,
+ bch2_dev_put(_T),
+ bch2_device_lookup(c, dev, flags),
+ struct bch_fs *c, u64 dev, unsigned flags);
+
#if 0
static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
{
@@ -207,8 +212,6 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
{
- struct bch_dev *ca;
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -219,7 +222,7 @@ static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
arg.pad)
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ struct bch_dev *ca = bch2_device_lookup(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
@@ -249,9 +252,6 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
{
- struct bch_dev *ca;
- int ret;
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -262,21 +262,16 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
arg.pad)
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch2_dev_offline(c, ca, arg.flags);
- bch2_dev_put(ca);
- return ret;
+ return bch2_dev_offline(c, ca, arg.flags);
}
static long bch2_ioctl_disk_set_state(struct bch_fs *c,
struct bch_ioctl_disk_set_state arg)
{
- struct bch_dev *ca;
- int ret;
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -288,15 +283,12 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c,
arg.new_state >= BCH_MEMBER_STATE_NR)
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
- if (ret)
- bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
-
- bch2_dev_put(ca);
+ int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
+ bch_err_msg(ca, ret, "setting device state");
return ret;
}
@@ -312,7 +304,7 @@ static int bch2_data_thread(void *arg)
{
struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
- ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
+ ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, &ctx->arg);
if (ctx->thr.ret == -BCH_ERR_device_offline)
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
else {
@@ -349,14 +341,13 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
};
if (ctx->arg.op == BCH_DATA_OP_scrub) {
- struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
+ CLASS(bch2_dev_tryget_noerror, ca)(c, ctx->arg.scrub.dev);
if (ca) {
struct bch_dev_usage_full u;
bch2_dev_usage_full_read_fast(ca, &u);
for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
if (ctx->arg.scrub.data_types & BIT(i))
e.p.sectors_total += u.d[i].sectors;
- bch2_dev_put(ca);
}
} else {
e.p.sectors_total = bch2_fs_usage_read_short(c).used;
@@ -418,9 +409,8 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
struct bch_ioctl_fs_usage __user *user_arg)
{
struct bch_ioctl_fs_usage arg = {};
- darray_char replicas = {};
+ CLASS(darray_char, replicas)();
u32 replica_entries_bytes;
- int ret = 0;
if (!test_bit(BCH_FS_started, &c->flags))
return -EINVAL;
@@ -428,11 +418,11 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
return -EFAULT;
- ret = bch2_fs_replicas_usage_read(c, &replicas) ?:
+ int ret = bch2_fs_replicas_usage_read(c, &replicas) ?:
(replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?:
copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr);
if (ret)
- goto err;
+ return ret;
struct bch_fs_usage_short u = bch2_fs_usage_read_short(c);
arg.capacity = c->capacity;
@@ -449,52 +439,41 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
&arg.persistent_reserved[i], 1);
}
- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
-err:
- darray_exit(&replicas);
- return ret;
+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
}
static long bch2_ioctl_query_accounting(struct bch_fs *c,
struct bch_ioctl_query_accounting __user *user_arg)
{
struct bch_ioctl_query_accounting arg;
- darray_char accounting = {};
- int ret = 0;
+ CLASS(darray_char, accounting)();
if (!test_bit(BCH_FS_started, &c->flags))
return -EINVAL;
- ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?:
+ int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?:
bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?:
(arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?:
copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr);
if (ret)
- goto err;
+ return ret;
arg.capacity = c->capacity;
arg.used = bch2_fs_usage_read_short(c).used;
arg.online_reserved = percpu_u64_get(c->online_reserved);
arg.accounting_u64s = accounting.nr / sizeof(u64);
- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
-err:
- darray_exit(&accounting);
- return ret;
+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
}
/* obsolete, didn't allow for new data types: */
static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c,
struct bch_ioctl_dev_usage __user *user_arg)
{
- struct bch_ioctl_dev_usage arg;
- struct bch_dev_usage_full src;
- struct bch_dev *ca;
- unsigned i;
-
if (!test_bit(BCH_FS_started, &c->flags))
return -EINVAL;
+ struct bch_ioctl_dev_usage arg;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
@@ -504,38 +483,32 @@ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c,
arg.pad[2])
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- src = bch2_dev_usage_full_read(ca);
+ struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca);
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
- for (i = 0; i < ARRAY_SIZE(arg.d); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(arg.d); i++) {
arg.d[i].buckets = src.d[i].buckets;
arg.d[i].sectors = src.d[i].sectors;
arg.d[i].fragmented = src.d[i].fragmented;
}
- bch2_dev_put(ca);
-
return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
}
static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
struct bch_ioctl_dev_usage_v2 __user *user_arg)
{
- struct bch_ioctl_dev_usage_v2 arg;
- struct bch_dev_usage_full src;
- struct bch_dev *ca;
- int ret = 0;
-
if (!test_bit(BCH_FS_started, &c->flags))
return -EINVAL;
+ struct bch_ioctl_dev_usage_v2 arg;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
@@ -545,20 +518,20 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
arg.pad[2])
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- src = bch2_dev_usage_full_read(ca);
+ struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca);
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+ int ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
if (ret)
- goto err;
+ return ret;
for (unsigned i = 0; i < arg.nr_data_types; i++) {
struct bch_ioctl_dev_usage_type t = {
@@ -569,11 +542,10 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
if (ret)
- goto err;
+ return ret;
}
-err:
- bch2_dev_put(ca);
- return ret;
+
+ return 0;
}
static long bch2_ioctl_read_super(struct bch_fs *c,
@@ -590,13 +562,13 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
arg.pad)
return -EINVAL;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
if (arg.flags & BCH_READ_DEV) {
ca = bch2_device_lookup(c, arg.dev, arg.flags);
ret = PTR_ERR_OR_ZERO(ca);
if (ret)
- goto err_unlock;
+ return ret;
sb = ca->disk_sb.sb;
} else {
@@ -612,8 +584,6 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
vstruct_bytes(sb));
err:
bch2_dev_put(ca);
-err_unlock:
- mutex_unlock(&c->sb_lock);
return ret;
}
@@ -639,9 +609,6 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
static long bch2_ioctl_disk_resize(struct bch_fs *c,
struct bch_ioctl_disk_resize arg)
{
- struct bch_dev *ca;
- int ret;
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -649,22 +616,16 @@ static long bch2_ioctl_disk_resize(struct bch_fs *c,
arg.pad)
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch2_dev_resize(c, ca, arg.nbuckets);
-
- bch2_dev_put(ca);
- return ret;
+ return bch2_dev_resize(c, ca, arg.nbuckets);
}
static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
struct bch_ioctl_disk_resize_journal arg)
{
- struct bch_dev *ca;
- int ret;
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -675,14 +636,11 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
if (arg.nbuckets > U32_MAX)
return -EINVAL;
- ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
-
- bch2_dev_put(ca);
- return ret;
+ return bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
}
#define BCH_IOCTL(_name, _argtype) \
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index a6795e73f0b9..b1ec38992852 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -361,7 +361,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
extent_nonce(version, crc_old), bio);
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
" expected %0llx:%0llx got %0llx:%0llx (old type ",
__func__,
@@ -374,7 +374,6 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
bch2_prt_csum_type(&buf, new_csum_type);
prt_str(&buf, ")");
WARN_RATELIMIT(1, "%s", buf.buf);
- printbuf_exit(&buf);
return bch_err_throw(c, recompute_checksum);
}
@@ -438,23 +437,21 @@ const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
#ifdef __KERNEL__
static int __bch2_request_key(char *key_description, struct bch_key *key)
{
- struct key *keyring_key;
- const struct user_key_payload *ukp;
int ret;
- keyring_key = request_key(&key_type_user, key_description, NULL);
+ struct key *keyring_key = request_key(&key_type_user, key_description, NULL);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
- down_read(&keyring_key->sem);
- ukp = dereference_key_locked(keyring_key);
- if (ukp->datalen == sizeof(*key)) {
- memcpy(key, ukp->data, ukp->datalen);
- ret = 0;
- } else {
- ret = -EINVAL;
+ scoped_guard(rwsem_read, &keyring_key->sem) {
+ const struct user_key_payload *ukp = dereference_key_locked(keyring_key);
+ if (ukp->datalen == sizeof(*key)) {
+ memcpy(key, ukp->data, ukp->datalen);
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
}
- up_read(&keyring_key->sem);
key_put(keyring_key);
return ret;
@@ -495,14 +492,13 @@ got_key:
int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
{
- struct printbuf key_description = PRINTBUF;
+ CLASS(printbuf, key_description)();
int ret;
prt_printf(&key_description, "bcachefs:");
pr_uuid(&key_description, sb->user_uuid.b);
ret = __bch2_request_key(key_description.buf, key);
- printbuf_exit(&key_description);
#ifndef __KERNEL__
if (ret) {
@@ -524,13 +520,12 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
int bch2_revoke_key(struct bch_sb *sb)
{
key_serial_t key_id;
- struct printbuf key_description = PRINTBUF;
+ CLASS(printbuf, key_description)();
prt_printf(&key_description, "bcachefs:");
pr_uuid(&key_description, sb->user_uuid.b);
key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING);
- printbuf_exit(&key_description);
if (key_id < 0)
return errno;
@@ -584,34 +579,28 @@ err:
*/
int bch2_disable_encryption(struct bch_fs *c)
{
- struct bch_sb_field_crypt *crypt;
- struct bch_key key;
- int ret = -EINVAL;
-
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
- crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
if (!crypt)
- goto out;
+ return -EINVAL;
/* is key encrypted? */
ret = 0;
if (bch2_key_is_encrypted(&crypt->key))
- goto out;
+ return 0;
- ret = bch2_decrypt_sb_key(c, crypt, &key);
+ struct bch_key key;
+ int ret = bch2_decrypt_sb_key(c, crypt, &key);
if (ret)
- goto out;
+ return ret;
crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC);
crypt->key.key = key;
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
bch2_write_super(c);
-out:
- mutex_unlock(&c->sb_lock);
-
- return ret;
+ return 0;
}
/*
@@ -625,7 +614,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
struct bch_sb_field_crypt *crypt;
int ret = -EINVAL;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
/* Do we already have an encryption key? */
if (bch2_sb_field_get(c->disk_sb.sb, crypt))
@@ -669,7 +658,6 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
bch2_write_super(c);
err:
- mutex_unlock(&c->sb_lock);
memzero_explicit(&user_key, sizeof(user_key));
memzero_explicit(&key, sizeof(key));
return ret;
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 8e9264b5a84e..1c6d0cdca3c5 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -40,15 +40,13 @@ out:
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
{
- spin_lock(&clock->timer_lock);
+ guard(spinlock)(&clock->timer_lock);
for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer) {
min_heap_del(&clock->timers, i, &callbacks, NULL);
- break;
+ return;
}
-
- spin_unlock(&clock->timer_lock);
}
struct io_clock_wait {
@@ -133,28 +131,27 @@ void __bch2_increment_clock(struct io_clock *clock, u64 sectors)
struct io_timer *timer;
u64 now = atomic64_add_return(sectors, &clock->now);
- spin_lock(&clock->timer_lock);
+ guard(spinlock)(&clock->timer_lock);
+
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
- spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
{
- out->atomic++;
- spin_lock(&clock->timer_lock);
u64 now = atomic64_read(&clock->now);
printbuf_tabstop_push(out, 40);
prt_printf(out, "current time:\t%llu\n", now);
+ guard(printbuf_atomic)(out);
+ guard(spinlock)(&clock->timer_lock);
+
for (unsigned i = 0; i < clock->timers.nr; i++)
prt_printf(out, "%ps %ps:\t%llu\n",
clock->timers.data[i]->fn,
clock->timers.data[i]->fn2,
clock->timers.data[i]->expire);
- spin_unlock(&clock->timer_lock);
- --out->atomic;
}
void bch2_io_clock_exit(struct io_clock *clock)
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index b37b1f325f0a..aeb9b9bd7d33 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -336,7 +336,7 @@ static int attempt_compress(struct bch_fs *c,
void *workspace,
void *dst, size_t dst_len,
void *src, size_t src_len,
- struct bch_compression_opt compression)
+ union bch_compression_opt compression)
{
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type[compression.type];
@@ -426,7 +426,7 @@ static int attempt_compress(struct bch_fs *c,
static unsigned __bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len,
- struct bch_compression_opt compression)
+ union bch_compression_opt compression)
{
struct bbuf src_data = { NULL }, dst_data = { NULL };
void *workspace;
@@ -553,7 +553,7 @@ unsigned bch2_bio_compress(struct bch_fs *c,
compression_type =
__bio_compress(c, dst, dst_len, src, src_len,
- bch2_compression_decode(compression_opt));
+ (union bch_compression_opt){ .value = compression_opt });
dst->bi_iter.bi_size = orig_dst;
src->bi_iter.bi_size = orig_src;
@@ -579,30 +579,25 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
if ((c->sb.features & f) == f)
return 0;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
- if ((c->sb.features & f) == f) {
- mutex_unlock(&c->sb_lock);
+ if ((c->sb.features & f) == f)
return 0;
- }
ret = __bch2_fs_compress_init(c, c->sb.features|f);
- if (ret) {
- mutex_unlock(&c->sb_lock);
+ if (ret)
return ret;
- }
c->disk_sb.sb->features[0] |= cpu_to_le64(f);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
return 0;
}
int bch2_check_set_has_compressed_data(struct bch_fs *c,
unsigned compression_opt)
{
- unsigned compression_type = bch2_compression_decode(compression_opt).type;
+ unsigned int compression_type = ((union bch_compression_opt){ .value = compression_opt })
+ .type;
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
@@ -683,7 +678,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
static u64 compression_opt_to_feature(unsigned v)
{
- unsigned type = bch2_compression_decode(v).type;
+ unsigned int type = ((union bch_compression_opt){ .value = v }).type;
return BIT_ULL(bch2_compression_opt_to_feature[type]);
}
@@ -703,7 +698,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
{
char *val = kstrdup(_val, GFP_KERNEL);
char *p = val, *type_str, *level_str;
- struct bch_compression_opt opt = { 0 };
+ union bch_compression_opt opt = { 0 };
int ret;
if (!val)
@@ -736,7 +731,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
opt.level = level;
}
- *res = bch2_compression_encode(opt);
+ *res = opt.value;
err:
kfree(val);
return ret;
@@ -744,7 +739,7 @@ err:
void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
{
- struct bch_compression_opt opt = bch2_compression_decode(v);
+ union bch_compression_opt opt = { .value = v };
if (opt.type < BCH_COMPRESSION_OPT_NR)
prt_str(out, bch2_compression_opts[opt.type]);
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index bec2f05bfd52..667ddb91d47a 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -10,41 +10,27 @@ static const unsigned __bch2_compression_opt_to_type[] = {
#undef x
};
-struct bch_compression_opt {
- u8 type:4,
- level:4;
-};
-
-static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
-{
- return (struct bch_compression_opt) {
- .type = v & 15,
- .level = v >> 4,
+union bch_compression_opt {
+ u8 value;
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 type:4, level:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 level:4, type:4;
+#endif
};
-}
+};
static inline bool bch2_compression_opt_valid(unsigned v)
{
- struct bch_compression_opt opt = __bch2_compression_decode(v);
+ union bch_compression_opt opt = { .value = v };
return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
}
-static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
-{
- return bch2_compression_opt_valid(v)
- ? __bch2_compression_decode(v)
- : (struct bch_compression_opt) { 0 };
-}
-
-static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
-{
- return opt.type|(opt.level << 4);
-}
-
static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
{
- return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
+ return __bch2_compression_opt_to_type[((union bch_compression_opt){ .value = v }).type];
}
struct bch_write_op;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index e848e210a9bf..ccedc93fe0ef 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -115,7 +115,7 @@ static void trace_io_move_finish2(struct data_update *u,
struct bkey_i *insert)
{
struct bch_fs *c = u->op.c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_newline(&buf);
@@ -131,7 +131,6 @@ static void trace_io_move_finish2(struct data_update *u,
prt_newline(&buf);
trace_io_move_finish(c, buf.buf);
- printbuf_exit(&buf);
}
noinline_for_stack
@@ -143,7 +142,7 @@ static void trace_io_move_fail2(struct data_update *m,
{
struct bch_fs *c = m->op.c;
struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
unsigned rewrites_found = 0;
if (!trace_io_move_fail_enabled())
@@ -187,7 +186,6 @@ static void trace_io_move_fail2(struct data_update *m,
}
trace_io_move_fail(c, buf.buf);
- printbuf_exit(&buf);
}
noinline_for_stack
@@ -196,7 +194,7 @@ static void trace_data_update2(struct data_update *m,
struct bkey_i *insert)
{
struct bch_fs *c = m->op.c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "\nold: ");
bch2_bkey_val_to_text(&buf, c, old);
@@ -206,7 +204,6 @@ static void trace_data_update2(struct data_update *m,
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
trace_data_update(c, buf.buf);
- printbuf_exit(&buf);
}
noinline_for_stack
@@ -215,7 +212,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m,
struct bkey_i *insert)
{
struct bch_fs *c = m->op.c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts);
@@ -227,7 +224,6 @@ static void trace_io_move_created_rebalance2(struct data_update *m,
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
trace_io_move_created_rebalance(c, buf.buf);
- printbuf_exit(&buf);
this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]);
}
@@ -238,7 +234,7 @@ static int data_update_invalid_bkey(struct data_update *m,
struct bkey_i *insert)
{
struct bch_fs *c = m->op.c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_str(&buf, "about to insert invalid key in data update path");
@@ -254,7 +250,6 @@ static int data_update_invalid_bkey(struct data_update *m,
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return bch_err_throw(c, invalid_bkey);
}
@@ -499,7 +494,8 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op)
{
- return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
+ CLASS(btree_trans, trans)(op->c);
+ return __bch2_data_update_index_update(trans, op);
}
void bch2_data_update_read_done(struct data_update *m)
@@ -675,7 +671,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
if (!m->read_done) {
prt_printf(out, "read:\n");
printbuf_indent_add(out, 2);
- bch2_read_bio_to_text(out, &m->rbio);
+ bch2_read_bio_to_text(out, m->op.c, &m->rbio);
} else {
prt_printf(out, "write:\n");
printbuf_indent_add(out, 2);
@@ -783,6 +779,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
darray_for_each(m->op.devs_have, i)
__clear_bit(*i, devs.d);
+ CLASS(printbuf, buf)();
+
+ guard(printbuf_atomic)(&buf);
guard(rcu)();
unsigned nr_replicas = 0, i;
@@ -794,7 +793,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
struct bch_dev_usage usage;
bch2_dev_usage_read_fast(ca, &usage);
- if (!dev_buckets_free(ca, usage, m->op.watermark))
+ u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark);
+
+ prt_printf(&buf, "%s=%llu ", ca->name, nr_free);
+
+ if (!nr_free)
continue;
nr_replicas += ca->mi.durability;
@@ -802,8 +805,10 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
break;
}
- if (!nr_replicas)
+ if (!nr_replicas) {
+ trace_data_update_done_no_rw_devs(c, buf.buf);
return bch_err_throw(c, data_update_done_no_rw_devs);
+ }
if (nr_replicas < m->op.nr_replicas)
return bch_err_throw(c, insufficient_devices);
return 0;
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 07c2a0f73cc2..33cb94f70b19 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -141,7 +141,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
return;
bch2_btree_node_io_lock(b);
- mutex_lock(&c->verify_lock);
+ guard(mutex)(&c->verify_lock);
if (!c->verify_ondisk) {
c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
@@ -172,14 +172,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
failed |= bch2_btree_verify_replica(c, b, p);
if (failed) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
- printbuf_exit(&buf);
}
out:
- mutex_unlock(&c->verify_lock);
bch2_btree_node_io_unlock(b);
}
@@ -367,17 +364,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
+ CLASS(btree_trans, trans)(i->c);
return bch2_debugfs_flush_buf(i) ?:
- bch2_trans_run(i->c,
- for_each_btree_key(trans, iter, i->id, i->from,
- BTREE_ITER_prefetch|
- BTREE_ITER_all_snapshots, k, ({
- bch2_bkey_val_to_text(&i->buf, i->c, k);
- prt_newline(&i->buf);
- bch2_trans_unlock(trans);
- i->from = bpos_successor(iter.pos);
- bch2_debugfs_flush_buf(i);
- }))) ?:
+ for_each_btree_key(trans, iter, i->id, i->from,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_all_snapshots, k, ({
+ bch2_bkey_val_to_text(&i->buf, i->c, k);
+ prt_newline(&i->buf);
+ bch2_trans_unlock(trans);
+ i->from = bpos_successor(iter.pos);
+ bch2_debugfs_flush_buf(i);
+ })) ?:
i->ret;
}
@@ -404,15 +401,15 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (bpos_eq(SPOS_MAX, i->from))
return i->ret;
- return bch2_trans_run(i->c,
- for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({
- bch2_btree_node_to_text(&i->buf, i->c, b);
- i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
- ? bpos_successor(b->key.k.p)
- : b->key.k.p;
+ CLASS(btree_trans, trans)(i->c);
+ return for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({
+ bch2_btree_node_to_text(&i->buf, i->c, b);
+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
+ ? bpos_successor(b->key.k.p)
+ : b->key.k.p;
- drop_locks_do(trans, bch2_debugfs_flush_buf(i));
- }))) ?: i->ret;
+ drop_locks_do(trans, bch2_debugfs_flush_buf(i));
+ })) ?: i->ret;
}
static const struct file_operations btree_format_debug_ops = {
@@ -431,27 +428,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
+ CLASS(btree_trans, trans)(i->c);
return bch2_debugfs_flush_buf(i) ?:
- bch2_trans_run(i->c,
- for_each_btree_key(trans, iter, i->id, i->from,
- BTREE_ITER_prefetch|
- BTREE_ITER_all_snapshots, k, ({
- struct btree_path_level *l =
- &btree_iter_path(trans, &iter)->l[0];
- struct bkey_packed *_k =
- bch2_btree_node_iter_peek(&l->iter, l->b);
-
- if (bpos_gt(l->b->key.k.p, i->prev_node)) {
- bch2_btree_node_to_text(&i->buf, i->c, l->b);
- i->prev_node = l->b->key.k.p;
- }
-
- bch2_bfloat_to_text(&i->buf, l->b, _k);
- bch2_trans_unlock(trans);
- i->from = bpos_successor(iter.pos);
- bch2_debugfs_flush_buf(i);
- }))) ?:
- i->ret;
+ for_each_btree_key(trans, iter, i->id, i->from,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_all_snapshots, k, ({
+ struct btree_path_level *l =
+ &btree_iter_path(trans, &iter)->l[0];
+ struct bkey_packed *_k =
+ bch2_btree_node_iter_peek(&l->iter, l->b);
+
+ if (bpos_gt(l->b->key.k.p, i->prev_node)) {
+ bch2_btree_node_to_text(&i->buf, i->c, l->b);
+ i->prev_node = l->b->key.k.p;
+ }
+
+ bch2_bfloat_to_text(&i->buf, l->b, _k);
+ bch2_trans_unlock(trans);
+ i->from = bpos_successor(iter.pos);
+ bch2_debugfs_flush_buf(i);
+ })) ?:
+ i->ret;
}
static const struct file_operations bfloat_failed_debug_ops = {
@@ -465,7 +462,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
struct btree *b)
{
if (!out->nr_tabstops)
- printbuf_tabstop_push(out, 32);
+ printbuf_tabstop_push(out, 36);
prt_printf(out, "%px ", b);
bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level);
@@ -512,8 +509,8 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
if (ret)
return ret;
- i->buf.atomic++;
scoped_guard(rcu) {
+ guard(printbuf_atomic)(&i->buf);
struct bucket_table *tbl =
rht_dereference_rcu(c->btree_cache.table.tbl,
&c->btree_cache.table);
@@ -528,7 +525,6 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
done = true;
}
}
- --i->buf.atomic;
} while (!done);
if (i->buf.allocation_failure)
@@ -771,7 +767,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]);
printbuf_indent_add(&i->buf, 2);
- mutex_lock(&s->lock);
+ guard(mutex)(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
@@ -802,8 +798,6 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
printbuf_indent_sub(&i->buf, 2);
}
- mutex_unlock(&s->lock);
-
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter++;
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index ccbb0127b724..dd60c47528da 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -13,6 +13,7 @@
#include <linux/dcache.h>
+#if IS_ENABLED(CONFIG_UNICODE)
int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
const struct qstr *str, struct qstr *out_cf)
{
@@ -34,6 +35,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
return 0;
}
+#endif
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
@@ -256,9 +258,11 @@ int bch2_dirent_init_name(struct bch_fs *c,
if (ret)
return ret;
+#if IS_ENABLED(CONFIG_UNICODE)
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
+ void *val_end = bkey_val_end(bkey_i_to_s(&dirent->k_i));
if (cf_name) {
cf_len = cf_name->len;
@@ -266,21 +270,20 @@ int bch2_dirent_init_name(struct bch_fs *c,
memcpy(cf_out, cf_name->name, cf_name->len);
} else {
cf_len = utf8_casefold(hash_info->cf_encoding, name,
- cf_out,
- bkey_val_end(bkey_i_to_s(&dirent->k_i)) - (void *) cf_out);
+ cf_out, val_end - (void *) cf_out);
if (cf_len <= 0)
return cf_len;
}
- memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_len], 0,
- bkey_val_bytes(&dirent->k) -
- offsetof(struct bch_dirent, d_cf_name_block.d_names) -
- name->len + cf_len);
+ void *name_end = &dirent->v.d_cf_name_block.d_names[name->len + cf_len];
+ BUG_ON(name_end > val_end);
+ memset(name_end, 0, val_end - name_end);
dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
+#endif
}
unsigned u64s = dirent_val_u64s(name->len, cf_len);
@@ -617,13 +620,12 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
const struct bch_hash_info *hash_info,
const struct qstr *name, subvol_inum *inum)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter = {};
int ret = lockrestart_do(trans,
bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
@@ -683,8 +685,8 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum,
struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
- int ret = bch2_trans_run(c,
- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents,
POS(inum.inum, ctx->pos),
POS(inum.inum, U64_MAX),
inum.subvol, 0, k, ({
@@ -705,7 +707,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum,
continue;
ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target));
- })));
+ }));
bch2_bkey_buf_exit(&sk, c);
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 1e17199cc5c7..efb58d2dcf68 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -23,8 +23,16 @@ struct bch_fs;
struct bch_hash_info;
struct bch_inode_info;
+#if IS_ENABLED(CONFIG_UNICODE)
int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
const struct qstr *, struct qstr *);
+#else
+static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
+{
+ return bch_err_throw(trans->c, no_casefolding_without_utf8);
+}
+#endif
static inline int bch2_maybe_casefold(struct btree_trans *trans,
const struct bch_hash_info *info,
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index f7528cd69c73..219e37738aee 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -380,11 +380,10 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
accounting_pos_cmp, NULL);
if (trace_accounting_mem_insert_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_accounting_to_text(&buf, c, a.s_c);
trace_accounting_mem_insert(c, buf.buf);
- printbuf_exit(&buf);
}
return 0;
err:
@@ -404,9 +403,9 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
return bch_err_throw(c, btree_insert_need_mark_replicas);
percpu_up_read(&c->mark_lock);
- percpu_down_write(&c->mark_lock);
- int ret = __bch2_accounting_mem_insert(c, a);
- percpu_up_write(&c->mark_lock);
+ int ret;
+ scoped_guard(percpu_write, &c->mark_lock)
+ ret = __bch2_accounting_mem_insert(c, a);
percpu_down_read(&c->mark_lock);
return ret;
}
@@ -438,7 +437,7 @@ void bch2_accounting_mem_gc(struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
- percpu_down_write(&c->mark_lock);
+ guard(percpu_write)(&c->mark_lock);
struct accounting_mem_entry *dst = acc->k.data;
darray_for_each(acc->k, src) {
@@ -453,7 +452,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c)
acc->k.nr = dst - acc->k.data;
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
- percpu_up_write(&c->mark_lock);
}
/*
@@ -471,7 +469,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
darray_init(usage);
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
darray_for_each(acc->k, i) {
union {
u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
@@ -494,7 +492,6 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r));
usage->nr += replicas_usage_bytes(&u.r);
}
- percpu_up_read(&c->mark_lock);
if (ret)
darray_exit(usage);
@@ -509,7 +506,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
darray_init(out_buf);
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
darray_for_each(acc->k, i) {
struct disk_accounting_pos a_p;
bpos_to_disk_accounting_pos(&a_p, i->pos);
@@ -533,8 +530,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
out_buf->nr += bkey_bytes(&a_out->k);
}
- percpu_up_read(&c->mark_lock);
-
if (ret)
darray_exit(out_buf);
return ret;
@@ -553,7 +548,7 @@ int bch2_gc_accounting_start(struct bch_fs *c)
struct bch_accounting_mem *acc = &c->accounting;
int ret = 0;
- percpu_down_write(&c->mark_lock);
+ guard(percpu_write)(&c->mark_lock);
darray_for_each(acc->k, e) {
e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL);
@@ -565,20 +560,18 @@ int bch2_gc_accounting_start(struct bch_fs *c)
}
acc->gc_running = !ret;
- percpu_up_write(&c->mark_lock);
-
return ret;
}
int bch2_gc_accounting_done(struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
- struct btree_trans *trans = bch2_trans_get(c);
- struct printbuf buf = PRINTBUF;
+ CLASS(btree_trans, trans)(c);
+ CLASS(printbuf, buf)();
struct bpos pos = POS_MIN;
int ret = 0;
- percpu_down_write(&c->mark_lock);
+ guard(percpu_write)(&c->mark_lock);
while (1) {
unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &pos);
@@ -622,7 +615,8 @@ int bch2_gc_accounting_done(struct bch_fs *c)
if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) {
percpu_up_write(&c->mark_lock);
- ret = commit_do(trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_skip_accounting_apply,
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));
percpu_down_write(&c->mark_lock);
if (ret)
@@ -637,20 +631,16 @@ int bch2_gc_accounting_done(struct bch_fs *c)
bkey_i_to_s_c_accounting(&k_i.k),
BCH_ACCOUNTING_normal, true);
- preempt_disable();
+ guard(preempt)();
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
- preempt_enable();
}
}
}
}
err:
fsck_err:
- percpu_up_write(&c->mark_lock);
- printbuf_exit(&buf);
- bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
}
@@ -662,11 +652,9 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
if (k.k->type != KEY_TYPE_accounting)
return 0;
- percpu_down_read(&c->mark_lock);
- int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
- BCH_ACCOUNTING_read, false);
- percpu_up_read(&c->mark_lock);
- return ret;
+ guard(percpu_read)(&c->mark_lock);
+ return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
+ BCH_ACCOUNTING_read, false);
}
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
@@ -674,7 +662,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
u64 *v, unsigned nr)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0, invalid_dev = -1;
switch (acc->type) {
@@ -723,7 +711,6 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
}
fsck_err:
- printbuf_exit(&buf);
return ret;
invalid_device:
if (fsck_err(trans, accounting_to_invalid_device,
@@ -751,8 +738,8 @@ invalid_device:
int bch2_accounting_read(struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
- struct btree_trans *trans = bch2_trans_get(c);
- struct printbuf buf = PRINTBUF;
+ CLASS(btree_trans, trans)(c);
+ CLASS(printbuf, buf)();
/*
* We might run more than once if we rewind to start topology repair or
@@ -761,13 +748,13 @@ int bch2_accounting_read(struct bch_fs *c)
*
* Instead, zero out any accounting we have:
*/
- percpu_down_write(&c->mark_lock);
- darray_for_each(acc->k, e)
- percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
- for_each_member_device(c, ca)
- percpu_memset(ca->usage, 0, sizeof(*ca->usage));
- percpu_memset(c->usage, 0, sizeof(*c->usage));
- percpu_up_write(&c->mark_lock);
+ scoped_guard(percpu_write, &c->mark_lock) {
+ darray_for_each(acc->k, e)
+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
+ for_each_member_device(c, ca)
+ percpu_memset(ca->usage, 0, sizeof(*ca->usage));
+ percpu_memset(c->usage, 0, sizeof(*c->usage));
+ }
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
@@ -798,7 +785,7 @@ int bch2_accounting_read(struct bch_fs *c)
accounting_read_key(trans, k);
}));
if (ret)
- goto err;
+ return ret;
struct journal_keys *keys = &c->journal_keys;
struct journal_key *dst = keys->data;
@@ -837,14 +824,14 @@ int bch2_accounting_read(struct bch_fs *c)
ret = accounting_read_key(trans, k);
if (ret)
- goto err;
+ return ret;
}
*dst++ = *i;
}
keys->gap = keys->nr = dst - keys->data;
- percpu_down_write(&c->mark_lock);
+ guard(percpu_write)(&c->mark_lock);
darray_for_each_reverse(acc->k, i) {
struct disk_accounting_pos acc_k;
@@ -876,60 +863,55 @@ int bch2_accounting_read(struct bch_fs *c)
}
if (ret)
- goto fsck_err;
+ return ret;
}
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
- preempt_disable();
- struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+ scoped_guard(preempt) {
+ struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
- for (unsigned i = 0; i < acc->k.nr; i++) {
- struct disk_accounting_pos k;
- bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
+ for (unsigned i = 0; i < acc->k.nr; i++) {
+ struct disk_accounting_pos k;
+ bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
- switch (k.type) {
- case BCH_DISK_ACCOUNTING_persistent_reserved:
- usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
- break;
- case BCH_DISK_ACCOUNTING_replicas:
- fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]);
- break;
- case BCH_DISK_ACCOUNTING_dev_data_type: {
- guard(rcu)();
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
- if (ca) {
- struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
- percpu_u64_set(&d->buckets, v[0]);
- percpu_u64_set(&d->sectors, v[1]);
- percpu_u64_set(&d->fragmented, v[2]);
-
- if (k.dev_data_type.data_type == BCH_DATA_sb ||
- k.dev_data_type.data_type == BCH_DATA_journal)
- usage->hidden += v[0] * ca->mi.bucket_size;
+ switch (k.type) {
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]);
+ break;
+ case BCH_DISK_ACCOUNTING_dev_data_type: {
+ guard(rcu)();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
+ if (ca) {
+ struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
+ percpu_u64_set(&d->buckets, v[0]);
+ percpu_u64_set(&d->sectors, v[1]);
+ percpu_u64_set(&d->fragmented, v[2]);
+
+ if (k.dev_data_type.data_type == BCH_DATA_sb ||
+ k.dev_data_type.data_type == BCH_DATA_journal)
+ usage->hidden += v[0] * ca->mi.bucket_size;
+ }
+ break;
+ }
}
- break;
- }
}
}
- preempt_enable();
-fsck_err:
- percpu_up_write(&c->mark_lock);
-err:
- printbuf_exit(&buf);
- bch2_trans_put(trans);
- bch_err_fn(c, ret);
+
return ret;
}
int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
{
- return bch2_trans_run(c,
- bch2_btree_write_buffer_flush_sync(trans) ?:
+ CLASS(btree_trans, trans)(c);
+ return bch2_btree_write_buffer_flush_sync(trans) ?:
for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN,
BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({
struct disk_accounting_pos acc;
@@ -940,15 +922,16 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0)
: 0;
})) ?:
- bch2_btree_write_buffer_flush_sync(trans));
+ bch2_btree_write_buffer_flush_sync(trans);
}
int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
{
struct bch_fs *c = ca->fs;
+ CLASS(btree_trans, trans)(c);
u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 };
- int ret = bch2_trans_do(c, ({
+ int ret = lockrestart_do(trans, ({
bch2_disk_accounting_mod2(trans, gc,
v, dev_data_type,
.dev = ca->dev_idx,
@@ -964,78 +947,77 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
bool mismatch = false;
struct bch_fs_usage_base base = {}, base_inmem = {};
- bch2_trans_run(c,
- for_each_btree_key(trans, iter,
- BTREE_ID_accounting, POS_MIN,
- BTREE_ITER_all_snapshots, k, ({
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k);
- unsigned nr = bch2_accounting_counters(k.k);
+ CLASS(btree_trans, trans)(c);
+ for_each_btree_key(trans, iter,
+ BTREE_ID_accounting, POS_MIN,
+ BTREE_ITER_all_snapshots, k, ({
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k);
+ unsigned nr = bch2_accounting_counters(k.k);
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, k.k->p);
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, k.k->p);
- if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
- break;
+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
+ break;
- if (!bch2_accounting_is_mem(&acc_k)) {
- struct disk_accounting_pos next;
- memset(&next, 0, sizeof(next));
- next.type = acc_k.type + 1;
- bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next));
- continue;
- }
+ if (!bch2_accounting_is_mem(&acc_k)) {
+ struct disk_accounting_pos next;
+ memset(&next, 0, sizeof(next));
+ next.type = acc_k.type + 1;
+ bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next));
+ continue;
+ }
- bch2_accounting_mem_read(c, k.k->p, v, nr);
+ bch2_accounting_mem_read(c, k.k->p, v, nr);
- if (memcmp(a.v->d, v, nr * sizeof(u64))) {
- struct printbuf buf = PRINTBUF;
+ if (memcmp(a.v->d, v, nr * sizeof(u64))) {
+ CLASS(printbuf, buf)();
- bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, " !=");
- for (unsigned j = 0; j < nr; j++)
- prt_printf(&buf, " %llu", v[j]);
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, " !=");
+ for (unsigned j = 0; j < nr; j++)
+ prt_printf(&buf, " %llu", v[j]);
- pr_err("%s", buf.buf);
- printbuf_exit(&buf);
- mismatch = true;
- }
+ pr_err("%s", buf.buf);
+ mismatch = true;
+ }
- switch (acc_k.type) {
- case BCH_DISK_ACCOUNTING_persistent_reserved:
- base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0];
- break;
- case BCH_DISK_ACCOUNTING_replicas:
- fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]);
- break;
- case BCH_DISK_ACCOUNTING_dev_data_type:
- {
- guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
- if (!ca)
- continue;
-
- v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
- v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
- v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
- }
+ switch (acc_k.type) {
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0];
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]);
+ break;
+ case BCH_DISK_ACCOUNTING_dev_data_type: {
+ {
+ guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
+ if (!ca)
+ continue;
+
+ v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
+ v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
+ v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
+ }
- if (memcmp(a.v->d, v, 3 * sizeof(u64))) {
- struct printbuf buf = PRINTBUF;
+ if (memcmp(a.v->d, v, 3 * sizeof(u64))) {
+ CLASS(printbuf, buf)();
- bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, " in mem");
- for (unsigned j = 0; j < nr; j++)
- prt_printf(&buf, " %llu", v[j]);
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, " in mem");
+ for (unsigned j = 0; j < nr; j++)
+ prt_printf(&buf, " %llu", v[j]);
- pr_err("dev accounting mismatch: %s", buf.buf);
- printbuf_exit(&buf);
- mismatch = true;
- }
+ pr_err("dev accounting mismatch: %s", buf.buf);
+ mismatch = true;
}
+ }
+ }
- 0;
- })));
+ 0;
+ }));
acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64));
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index d61abebf3e0b..43f4b21d0aab 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -211,10 +211,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
{
- percpu_down_read(&trans->c->mark_lock);
- int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
- percpu_up_read(&trans->c->mark_lock);
- return ret;
+ guard(percpu_read)(&trans->c->mark_lock);
+ return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
}
static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *acc,
@@ -236,13 +234,12 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
u64 *v, unsigned nr)
{
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &p);
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
- percpu_up_read(&c->mark_lock);
}
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index cde842ac1886..293e47268508 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -375,7 +375,7 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_printbuf_make_room(out, 4096);
- out->atomic++;
+ guard(printbuf_atomic)(out);
guard(rcu)();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
@@ -396,16 +396,13 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
next:
prt_newline(out);
}
-
- out->atomic--;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
- out->atomic++;
+ guard(printbuf_atomic)(out);
guard(rcu)();
- __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v),
- --out->atomic;
+ __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v);
}
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
@@ -471,14 +468,9 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
{
- int ret;
-
- mutex_lock(&c->sb_lock);
- ret = __bch2_dev_group_set(c, ca, name) ?:
+ guard(mutex)(&c->sb_lock);
+ return __bch2_dev_group_set(c, ca, name) ?:
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
- return ret;
}
int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
@@ -506,9 +498,8 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
return 0;
}
- mutex_lock(&c->sb_lock);
- g = bch2_disk_path_find(&c->disk_sb, val);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock)
+ g = bch2_disk_path_find(&c->disk_sb, val);
if (g >= 0) {
*res = group_to_target(g);
@@ -527,7 +518,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
prt_printf(out, "none");
return;
case TARGET_DEV: {
- out->atomic++;
+ guard(printbuf_atomic)(out);
guard(rcu)();
struct bch_dev *ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
@@ -539,8 +530,6 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
prt_printf(out, "offline device %u", t.dev);
else
prt_printf(out, "invalid device %u", t.dev);
-
- out->atomic--;
return;
}
case TARGET_GROUP:
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 687c3ba98095..8a8c94bd2580 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -197,8 +197,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bool parity = ptr_idx >= nr_data;
enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
s64 sectors = parity ? le16_to_cpu(s.v->sectors) : 0;
- struct printbuf buf = PRINTBUF;
- int ret = 0;
+ CLASS(printbuf, buf)();
struct bch_fs *c = trans->c;
if (deleting)
@@ -212,10 +211,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bch2_data_type_str(a->data_type),
a->dirty_sectors,
a->stripe, s.k->p.offset,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
if (bch2_trans_inconsistent_on(parity && bch2_bucket_sectors_total(*a), trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u cached_sectors %u: data already in parity bucket\n%s",
@@ -223,30 +220,24 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bch2_data_type_str(a->data_type),
a->dirty_sectors,
a->cached_sectors,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
} else {
if (bch2_trans_inconsistent_on(a->stripe != s.k->p.offset ||
a->stripe_redundancy != s.v->nr_redundant, trans,
"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe (got %u)\n%s",
bucket.inode, bucket.offset, a->gen,
a->stripe,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
if (bch2_trans_inconsistent_on(a->data_type != data_type, trans,
"bucket %llu:%llu gen %u data type %s: wrong data type when stripe, should be %s\n%s",
bucket.inode, bucket.offset, a->gen,
bch2_data_type_str(a->data_type),
bch2_data_type_str(data_type),
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
if (bch2_trans_inconsistent_on(parity &&
(a->dirty_sectors != -sectors ||
@@ -255,17 +246,15 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bucket.inode, bucket.offset, a->gen,
a->dirty_sectors,
a->cached_sectors,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
}
if (sectors) {
- ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type,
- a->gen, a->data_type, &a->dirty_sectors);
+ int ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type,
+ a->gen, a->data_type, &a->dirty_sectors);
if (ret)
- goto err;
+ return ret;
}
if (!deleting) {
@@ -277,9 +266,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->stripe_redundancy = 0;
alloc_data_type_set(a, BCH_DATA_user);
}
-err:
- printbuf_exit(&buf);
- return ret;
+
+ return 0;
}
static int mark_stripe_bucket(struct btree_trans *trans,
@@ -289,14 +277,13 @@ static int mark_stripe_bucket(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx;
- struct printbuf buf = PRINTBUF;
- int ret = 0;
+ CLASS(printbuf, buf)();
- struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
+ CLASS(bch2_dev_tryget, ca)(c, ptr->dev);
if (unlikely(!ca)) {
if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite))
- ret = bch_err_throw(c, mark_stripe);
- goto err;
+ return bch_err_throw(c, mark_stripe);
+ return 0;
}
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
@@ -312,36 +299,32 @@ static int mark_stripe_bucket(struct btree_trans *trans,
struct bkey_i_alloc_v4 *a =
bch2_trans_start_alloc_update(trans, bucket, 0);
- ret = PTR_ERR_OR_ZERO(a) ?:
+ int ret = PTR_ERR_OR_ZERO(a) ?:
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?:
bch2_bucket_backpointer_mod(trans, s.s_c, &bp,
!(flags & BTREE_TRIGGER_overwrite));
if (ret)
- goto err;
+ return ret;
}
if (flags & BTREE_TRIGGER_gc) {
struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
ptr->dev,
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = bch_err_throw(c, mark_stripe);
- goto err;
- }
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)))
+ return bch_err_throw(c, mark_stripe);
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
- ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
+ int ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
alloc_to_bucket(g, new);
bucket_unlock(g);
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
-err:
- bch2_dev_put(ca);
- printbuf_exit(&buf);
- return ret;
+
+ return 0;
}
static int mark_stripe_buckets(struct btree_trans *trans,
@@ -630,16 +613,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
struct bch_csum got = ec_block_checksum(buf, i, offset);
if (bch2_crc_cmp(want, got)) {
- struct bch_dev *ca = bch2_dev_tryget(c, v->ptrs[i].dev);
+ CLASS(bch2_dev_tryget, ca)(c, v->ptrs[i].dev);
if (ca) {
- struct printbuf err = PRINTBUF;
+ CLASS(printbuf, err)();
prt_str(&err, "stripe ");
bch2_csum_err_msg(&err, v->csum_type, want, got);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
- printbuf_exit(&err);
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
}
@@ -703,8 +685,8 @@ static void ec_block_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
unsigned ref = rw == READ
- ? BCH_DEV_READ_REF_ec_block
- : BCH_DEV_WRITE_REF_ec_block;
+ ? (unsigned) BCH_DEV_READ_REF_ec_block
+ : (unsigned) BCH_DEV_WRITE_REF_ec_block;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@@ -741,8 +723,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
: BCH_DATA_parity;
int rw = op_is_write(opf);
unsigned ref = rw == READ
- ? BCH_DEV_READ_REF_ec_block
- : BCH_DEV_WRITE_REF_ec_block;
+ ? (unsigned) BCH_DEV_READ_REF_ec_block
+ : (unsigned) BCH_DEV_WRITE_REF_ec_block;
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref);
if (!ca) {
@@ -832,7 +814,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bch_stripe *v;
unsigned i, offset;
const char *msg = NULL;
- struct printbuf msgbuf = PRINTBUF;
+ CLASS(printbuf, msgbuf)();
int ret = 0;
closure_init_stack(&cl);
@@ -894,7 +876,6 @@ err:
bch2_bkey_val_to_text(&msgbuf, c, orig_k);
bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
- printbuf_exit(&msgbuf);
ret = bch_err_throw(c, stripe_reconstruct);
goto out;
}
@@ -936,31 +917,22 @@ static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx)
static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
{
- bool ret = false;
-
- spin_lock(&c->ec_stripes_new_lock);
- ret = __bch2_stripe_is_open(c, idx);
- spin_unlock(&c->ec_stripes_new_lock);
-
- return ret;
+ guard(spinlock)(&c->ec_stripes_new_lock);
+ return __bch2_stripe_is_open(c, idx);
}
static bool bch2_try_open_stripe(struct bch_fs *c,
struct ec_stripe_new *s,
u64 idx)
{
- bool ret;
-
- spin_lock(&c->ec_stripes_new_lock);
- ret = !__bch2_stripe_is_open(c, idx);
+ guard(spinlock)(&c->ec_stripes_new_lock);
+ bool ret = !__bch2_stripe_is_open(c, idx);
if (ret) {
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
s->idx = idx;
hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
}
- spin_unlock(&c->ec_stripes_new_lock);
-
return ret;
}
@@ -968,9 +940,8 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
{
BUG_ON(!s->idx);
- spin_lock(&c->ec_stripes_new_lock);
+ guard(spinlock)(&c->ec_stripes_new_lock);
hlist_del_init(&s->hash);
- spin_unlock(&c->ec_stripes_new_lock);
s->idx = 0;
}
@@ -1063,7 +1034,7 @@ static int ec_stripe_key_update(struct btree_trans *trans,
unsigned sectors = stripe_blockcount_get(v, i);
if (!bch2_extent_ptr_eq(old->v.ptrs[i], new->v.ptrs[i]) && sectors) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "stripe changed nonempty block %u", i);
prt_str(&buf, "\nold: ");
@@ -1071,7 +1042,6 @@ static int ec_stripe_key_update(struct btree_trans *trans,
prt_str(&buf, "\nnew: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new->k_i));
bch2_fs_inconsistent(c, "%s", buf.buf);
- printbuf_exit(&buf);
ret = -EINVAL;
goto err;
}
@@ -1115,21 +1085,18 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
int ret, dev, block;
if (bp.v->level) {
- struct printbuf buf = PRINTBUF;
struct btree_iter node_iter;
- struct btree *b;
-
- b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed);
+ struct btree *b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed);
bch2_trans_iter_exit(trans, &node_iter);
if (!b)
return 0;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
bch2_bkey_val_to_text(&buf, c, bp.s_c);
bch2_fs_inconsistent(c, "%s", buf.buf);
- printbuf_exit(&buf);
return bch_err_throw(c, erasure_coding_found_btree_node);
}
@@ -1194,7 +1161,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bch_extent_ptr ptr = v->ptrs[block];
int ret = 0;
- struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
+ CLASS(bch2_dev_tryget, ca)(c, ptr.dev);
if (!ca)
return bch_err_throw(c, ENOENT_dev_not_found);
@@ -1225,28 +1192,26 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
}));
bch2_bkey_buf_exit(&last_flushed, c);
- bch2_dev_put(ca);
return ret;
}
static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
unsigned nr_data = v->nr_blocks - v->nr_redundant;
int ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
- goto err;
+ return ret;
for (unsigned i = 0; i < nr_data; i++) {
ret = ec_stripe_update_bucket(trans, s, i);
if (ret)
- break;
+ return ret;
}
-err:
- bch2_trans_put(trans);
- return ret;
+
+ return 0;
}
static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
@@ -1385,9 +1350,8 @@ err:
}
}
- mutex_lock(&c->ec_stripe_new_lock);
- list_del(&s->list);
- mutex_unlock(&c->ec_stripe_new_lock);
+ scoped_guard(mutex, &c->ec_stripe_new_lock)
+ list_del(&s->list);
wake_up(&c->ec_stripe_new_wait);
ec_stripe_buf_exit(&s->existing_stripe);
@@ -1401,15 +1365,11 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
{
struct ec_stripe_new *s;
- mutex_lock(&c->ec_stripe_new_lock);
+ guard(mutex)(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list)
if (!atomic_read(&s->ref[STRIPE_REF_io]))
- goto out;
- s = NULL;
-out:
- mutex_unlock(&c->ec_stripe_new_lock);
-
- return s;
+ return s;
+ return NULL;
}
static void ec_stripe_create_work(struct work_struct *work)
@@ -1443,9 +1403,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h
h->s = NULL;
s->pending = true;
- mutex_lock(&c->ec_stripe_new_lock);
- list_add(&s->list, &c->ec_stripe_new_list);
- mutex_unlock(&c->ec_stripe_new_lock);
+ scoped_guard(mutex, &c->ec_stripe_new_lock)
+ list_add(&s->list, &c->ec_stripe_new_list);
ec_stripe_new_put(c, s, STRIPE_REF_io);
}
@@ -2101,6 +2060,8 @@ allocated:
BUG_ON(trans->restarted);
return h;
err:
+ if (waiting)
+ closure_wake_up(&c->freelist_wait);
bch2_ec_stripe_head_put(c, h);
return ERR_PTR(ret);
}
@@ -2199,13 +2160,13 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s
int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_max_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_max_commit(trans, iter,
BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
BTREE_ITER_intent, k,
NULL, NULL, 0, ({
bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags);
- })));
+ }));
bch_err_fn(c, ret);
return ret;
}
@@ -2215,33 +2176,28 @@ int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags)
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
- struct open_bucket *ob;
- unsigned i;
- mutex_lock(&c->ec_stripe_head_lock);
+ guard(mutex)(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
- mutex_lock(&h->lock);
+ guard(mutex)(&h->lock);
if (!h->s)
- goto unlock;
+ continue;
if (!ca)
goto found;
- for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) {
+ for (unsigned i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) {
if (!h->s->blocks[i])
continue;
- ob = c->open_buckets + h->s->blocks[i];
+ struct open_bucket *ob = c->open_buckets + h->s->blocks[i];
if (ob->dev == ca->dev_idx)
goto found;
}
- goto unlock;
+ continue;
found:
ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
-unlock:
- mutex_unlock(&h->lock);
}
- mutex_unlock(&c->ec_stripe_head_lock);
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
@@ -2258,11 +2214,8 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
sched_annotate_sleep();
- mutex_lock(&c->ec_stripe_new_lock);
- bool ret = list_empty(&c->ec_stripe_new_list);
- mutex_unlock(&c->ec_stripe_new_lock);
-
- return ret;
+ guard(mutex)(&c->ec_stripe_new_lock);
+ return list_empty(&c->ec_stripe_new_list);
}
void bch2_fs_ec_flush(struct bch_fs *c)
@@ -2299,41 +2252,40 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
struct ec_stripe_head *h;
struct ec_stripe_new *s;
- mutex_lock(&c->ec_stripe_head_lock);
- list_for_each_entry(h, &c->ec_stripe_head_list, list) {
- prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
- h->disk_label, h->algo, h->redundancy,
- bch2_watermarks[h->watermark],
- h->nr_created);
+ scoped_guard(mutex, &c->ec_stripe_head_lock)
+ list_for_each_entry(h, &c->ec_stripe_head_list, list) {
+ prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
+ h->disk_label, h->algo, h->redundancy,
+ bch2_watermarks[h->watermark],
+ h->nr_created);
- if (h->s)
- bch2_new_stripe_to_text(out, c, h->s);
- }
- mutex_unlock(&c->ec_stripe_head_lock);
+ if (h->s)
+ bch2_new_stripe_to_text(out, c, h->s);
+ }
prt_printf(out, "in flight:\n");
- mutex_lock(&c->ec_stripe_new_lock);
- list_for_each_entry(s, &c->ec_stripe_new_list, list)
- bch2_new_stripe_to_text(out, c, s);
- mutex_unlock(&c->ec_stripe_new_lock);
+ scoped_guard(mutex, &c->ec_stripe_new_lock)
+ list_for_each_entry(s, &c->ec_stripe_new_list, list)
+ bch2_new_stripe_to_text(out, c, s);
}
void bch2_fs_ec_exit(struct bch_fs *c)
{
- struct ec_stripe_head *h;
- unsigned i;
while (1) {
- mutex_lock(&c->ec_stripe_head_lock);
- h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list);
- mutex_unlock(&c->ec_stripe_head_lock);
+ struct ec_stripe_head *h;
+
+ scoped_guard(mutex, &c->ec_stripe_head_lock)
+ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list);
if (!h)
break;
if (h->s) {
- for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++)
+ for (unsigned i = 0;
+ i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks;
+ i++)
BUG_ON(h->s->blocks[i]);
kfree(h->s);
@@ -2386,20 +2338,18 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
return 0;
}
-int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
+int bch2_check_stripe_to_lru_refs(struct btree_trans *trans)
{
struct bkey_buf last_flushed;
-
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
+ bch2_check_stripe_to_lru_ref(trans, k, &last_flushed));
- bch2_bkey_buf_exit(&last_flushed, c);
- bch_err_fn(c, ret);
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ bch_err_fn(trans->c, ret);
return ret;
}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 548048adf0d5..e807e7027d7a 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -304,6 +304,6 @@ void bch2_fs_ec_exit(struct bch_fs *);
void bch2_fs_ec_init_early(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);
-int bch2_check_stripe_to_lru_refs(struct bch_fs *);
+int bch2_check_stripe_to_lru_refs(struct btree_trans *);
#endif /* _BCACHEFS_EC_H */
diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c
index 56ab430f209f..2ded74135977 100644
--- a/fs/bcachefs/enumerated_ref.c
+++ b/fs/bcachefs/enumerated_ref.c
@@ -75,13 +75,11 @@ void enumerated_ref_stop(struct enumerated_ref *ref,
{
enumerated_ref_stop_async(ref);
while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n");
prt_str(&buf, "Outstanding refs:\n");
enumerated_ref_to_text(&buf, ref, names);
printk(KERN_ERR "%s", buf.buf);
- printbuf_exit(&buf);
}
}
diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c
index c39cf304c681..86264b8c343c 100644
--- a/fs/bcachefs/errcode.c
+++ b/fs/bcachefs/errcode.c
@@ -26,7 +26,8 @@ const char *bch2_err_str(int err)
err = abs(err);
- BUG_ON(err >= BCH_ERR_MAX);
+ if (err >= BCH_ERR_MAX)
+ return "(Invalid error)";
if (err >= BCH_ERR_START)
errstr = bch2_errcode_strs[err - BCH_ERR_START];
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 71649b4164b8..32a286b3a74e 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -42,15 +42,14 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
bool bch2_inconsistent_error(struct bch_fs *c)
{
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
printbuf_indent_add_nextline(&buf, 2);
bool ret = __bch2_inconsistent_error(c, &buf);
if (ret)
bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
return ret;
}
@@ -58,8 +57,8 @@ __printf(3, 0)
static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans,
const char *fmt, va_list args)
{
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
bch2_log_msg_start(c, &buf);
@@ -70,8 +69,6 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
bch2_trans_updates_to_text(&buf, trans);
bool ret = __bch2_inconsistent_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
-
- printbuf_exit(&buf);
return ret;
}
@@ -109,8 +106,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
{
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
va_list args;
@@ -120,8 +116,6 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
int ret = __bch2_topology_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
-
- printbuf_exit(&buf);
return ret;
}
@@ -138,18 +132,18 @@ void bch2_io_error_work(struct work_struct *work)
/* XXX: if it's reads or checksums that are failing, set it to failed */
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
unsigned long write_errors_start = READ_ONCE(ca->write_errors_start);
if (write_errors_start &&
time_after(jiffies,
write_errors_start + c->opts.write_error_timeout * HZ)) {
if (ca->mi.state >= BCH_MEMBER_STATE_ro)
- goto out;
+ return;
bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
BCH_FORCE_IF_DEGRADED);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
__bch2_log_msg_start(ca->name, &buf);
prt_printf(&buf, "writes erroring for %u seconds, setting %s ro",
@@ -159,10 +153,7 @@ void bch2_io_error_work(struct work_struct *work)
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
-out:
- up_write(&c->state_lock);
}
void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
@@ -382,11 +373,10 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
{
bch2_sb_error_count(c, id);
- mutex_lock(&c->fsck_error_msgs_lock);
bool print = true, repeat = false, suppress = false;
- count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
- mutex_unlock(&c->fsck_error_msgs_lock);
+ scoped_guard(mutex, &c->fsck_error_msgs_lock)
+ count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
if (suppress)
prt_printf(msg, "Ratelimiting new instances of previous error\n");
@@ -444,7 +434,8 @@ int __bch2_fsck_err(struct bch_fs *c,
const char *fmt, ...)
{
va_list args;
- struct printbuf buf = PRINTBUF, *out = &buf;
+ CLASS(printbuf, buf)();
+ struct printbuf *out = &buf;
int ret = 0;
const char *action_orig = "fix?", *action = action_orig;
@@ -637,7 +628,9 @@ err:
* log_fsck_err()s: that would require us to track for every error type
* which recovery pass corrects it, to get the fsck exit status correct:
*/
- if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ /* nothing */
+ } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
set_bit(BCH_FS_errors_fixed, &c->flags);
} else {
set_bit(BCH_FS_errors_not_fixed, &c->flags);
@@ -646,7 +639,6 @@ err:
if (action != action_orig)
kfree(action);
- printbuf_exit(&buf);
BUG_ON(!ret);
return ret;
@@ -678,7 +670,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
fsck_flags |= fsck_flags_extra[err];
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "invalid bkey in %s",
bch2_bkey_validate_contexts[from.from]);
@@ -699,7 +691,6 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
va_end(args);
int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf);
- printbuf_exit(&buf);
return ret;
}
@@ -707,7 +698,7 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
{
struct fsck_err_state *s, *n;
- mutex_lock(&c->fsck_error_msgs_lock);
+ guard(mutex)(&c->fsck_error_msgs_lock);
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
if (print && s->ratelimited && s->last_msg)
@@ -717,8 +708,6 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
kfree(s->last_msg);
kfree(s);
}
-
- mutex_unlock(&c->fsck_error_msgs_lock);
}
void bch2_flush_fsck_errs(struct bch_fs *c)
@@ -752,7 +741,8 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *o
void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
subvol_inum inum, u64 offset)
{
- bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
+ CLASS(btree_trans, trans)(c);
+ lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
}
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
@@ -769,5 +759,6 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb
void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
struct bpos pos)
{
- bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
+ CLASS(btree_trans, trans)(c);
+ lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
}
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 83cbd77dcb9c..b36ecfc0ab9d 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -63,15 +63,14 @@ void bch2_io_failures_to_text(struct printbuf *out,
((!!f->failed_ec) << 3);
bch2_printbuf_make_room(out, 1024);
- out->atomic++;
scoped_guard(rcu) {
+ guard(printbuf_atomic)(out);
struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
if (ca)
prt_str(out, ca->name);
else
prt_printf(out, "(invalid device %u)", f->dev);
}
- --out->atomic;
prt_char(out, ' ');
@@ -1023,6 +1022,18 @@ bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
return false;
}
+bool bch2_bkey_in_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ guard(rcu)();
+ bkey_for_each_ptr(ptrs, ptr)
+ if (!bch2_dev_in_target(c, ptr->dev, target))
+ return false;
+
+ return true;
+}
+
bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
struct bch_extent_ptr m, u64 offset)
{
@@ -1225,7 +1236,7 @@ restart_drop_ptrs:
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
{
- out->atomic++;
+ guard(printbuf_atomic)(out);
guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (!ca) {
@@ -1250,7 +1261,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
else if (stale)
prt_printf(out, " invalid");
}
- --out->atomic;
}
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
@@ -1512,7 +1522,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
const struct bch_extent_rebalance *r = &entry->rebalance;
if (!bch2_compression_opt_valid(r->compression)) {
- struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+ union bch_compression_opt opt = { .value = r->compression };
prt_printf(err, "invalid compression opt %u:%u",
opt.type, opt.level);
return bch_err_throw(c, invalid_bkey);
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index b8590e51b76e..f212f91c278d 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -615,6 +615,7 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig
}
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
+bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned);
void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
diff --git a/fs/bcachefs/fast_list.c b/fs/bcachefs/fast_list.c
index b52f264318d8..6be2a45be1dd 100644
--- a/fs/bcachefs/fast_list.c
+++ b/fs/bcachefs/fast_list.c
@@ -138,8 +138,21 @@ void fast_list_remove(struct fast_list *l, unsigned idx)
void fast_list_exit(struct fast_list *l)
{
- /* XXX: warn if list isn't empty */
- free_percpu(l->buffer);
+ if (l->buffer) {
+ int cpu;
+ for_each_possible_cpu(cpu) {
+ struct fast_list_pcpu *lp = per_cpu_ptr(l->buffer, cpu);
+
+ while (lp->nr)
+ ida_free(&l->slots_allocated, lp->entries[--lp->nr]);
+ }
+
+ free_percpu(l->buffer);
+ }
+
+ WARN(ida_find_first(&l->slots_allocated) >= 0,
+ "fast_list still has objects on exit\n");
+
ida_destroy(&l->slots_allocated);
genradix_free(&l->items);
}
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 4e82dfa6c03f..f2389054693a 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -254,12 +254,11 @@ err:
bch2_trans_iter_exit(trans, &iter);
if (ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9));
prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
rbio->bio.bi_status = BLK_STS_IOERR;
bio_endio(&rbio->bio);
@@ -425,27 +424,23 @@ static void bch2_writepage_io_done(struct bch_write_op *op)
set_bit(EI_INODE_ERROR, &io->inode->ei_flags);
bio_for_each_folio_all(fi, bio) {
- struct bch_folio *s;
-
mapping_set_error(fi.folio->mapping, -EIO);
- s = __bch2_folio(fi.folio);
- spin_lock(&s->lock);
+ struct bch_folio *s = __bch2_folio(fi.folio);
+ guard(spinlock)(&s->lock);
+
for (i = 0; i < folio_sectors(fi.folio); i++)
s->s[i].nr_replicas = 0;
- spin_unlock(&s->lock);
}
}
if (io->op.flags & BCH_WRITE_wrote_data_inline) {
bio_for_each_folio_all(fi, bio) {
- struct bch_folio *s;
+ struct bch_folio *s = __bch2_folio(fi.folio);
+ guard(spinlock)(&s->lock);
- s = __bch2_folio(fi.folio);
- spin_lock(&s->lock);
for (i = 0; i < folio_sectors(fi.folio); i++)
s->s[i].nr_replicas = 0;
- spin_unlock(&s->lock);
}
}
@@ -571,30 +566,30 @@ do_io:
BUG_ON(ret);
/* Before unlocking the page, get copy of reservations: */
- spin_lock(&s->lock);
- memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors);
+ scoped_guard(spinlock, &s->lock) {
+ memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors);
- for (i = 0; i < f_sectors; i++) {
- if (s->s[i].state < SECTOR_dirty)
- continue;
+ for (i = 0; i < f_sectors; i++) {
+ if (s->s[i].state < SECTOR_dirty)
+ continue;
- nr_replicas_this_write =
- min_t(unsigned, nr_replicas_this_write,
- s->s[i].nr_replicas +
- s->s[i].replicas_reserved);
- }
+ nr_replicas_this_write =
+ min_t(unsigned, nr_replicas_this_write,
+ s->s[i].nr_replicas +
+ s->s[i].replicas_reserved);
+ }
- for (i = 0; i < f_sectors; i++) {
- if (s->s[i].state < SECTOR_dirty)
- continue;
+ for (i = 0; i < f_sectors; i++) {
+ if (s->s[i].state < SECTOR_dirty)
+ continue;
- s->s[i].nr_replicas = w->opts.compression
- ? 0 : nr_replicas_this_write;
+ s->s[i].nr_replicas = w->opts.compression
+ ? 0 : nr_replicas_this_write;
- s->s[i].replicas_reserved = 0;
- bch2_folio_sector_set(folio, s, i, SECTOR_allocated);
+ s->s[i].replicas_reserved = 0;
+ bch2_folio_sector_set(folio, s, i, SECTOR_allocated);
+ }
}
- spin_unlock(&s->lock);
BUG_ON(atomic_read(&s->write_count));
atomic_set(&s->write_count, 1);
@@ -780,10 +775,9 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
copied = 0;
}
- spin_lock(&inode->v.i_lock);
- if (pos + copied > inode->v.i_size)
- i_size_write(&inode->v, pos + copied);
- spin_unlock(&inode->v.i_lock);
+ scoped_guard(spinlock, &inode->v.i_lock)
+ if (pos + copied > inode->v.i_size)
+ i_size_write(&inode->v, pos + copied);
if (copied) {
if (!folio_test_uptodate(folio))
@@ -942,10 +936,9 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
end = pos + copied;
- spin_lock(&inode->v.i_lock);
- if (end > inode->v.i_size)
- i_size_write(&inode->v, end);
- spin_unlock(&inode->v.i_lock);
+ scoped_guard(spinlock, &inode->v.i_lock)
+ if (end > inode->v.i_size)
+ i_size_write(&inode->v, end);
f_pos = pos;
f_offset = pos - folio_pos(darray_first(fs));
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 1f5154d9676b..73d44875faf2 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -252,7 +252,7 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
u64 offset, u64 size,
unsigned nr_replicas, bool compressed)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_s_c k;
u64 end = offset + size;
@@ -285,7 +285,6 @@ retry:
err:
if (bch2_err_matches(err, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_put(trans);
return err ? false : ret;
}
@@ -428,17 +427,15 @@ static __always_inline void bch2_dio_write_end(struct dio_write *dio)
dio->written += dio->op.written;
if (dio->extending) {
- spin_lock(&inode->v.i_lock);
+ guard(spinlock)(&inode->v.i_lock);
if (req->ki_pos > inode->v.i_size)
i_size_write(&inode->v, req->ki_pos);
- spin_unlock(&inode->v.i_lock);
}
if (dio->op.i_sectors_delta || dio->quota_res.sectors) {
- mutex_lock(&inode->ei_quota_lock);
+ guard(mutex)(&inode->ei_quota_lock);
__bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta);
__bch2_quota_reservation_put(c, inode, &dio->quota_res);
- mutex_unlock(&inode->ei_quota_lock);
}
bio_release_pages(bio, false);
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index c2cc405822f2..2a6705186c44 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -125,11 +125,9 @@ folio_sector_reserve(enum bch_folio_sector_state state)
/* for newly allocated folios: */
struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
{
- struct bch_folio *s;
-
- s = kzalloc(sizeof(*s) +
- sizeof(struct bch_folio_sector) *
- folio_sectors(folio), gfp);
+ struct bch_folio *s = kzalloc(sizeof(*s) +
+ sizeof(struct bch_folio_sector) *
+ folio_sectors(folio), gfp);
if (!s)
return NULL;
@@ -162,7 +160,7 @@ static void __bch2_folio_set(struct folio *folio,
BUG_ON(pg_offset >= sectors);
BUG_ON(pg_offset + pg_len > sectors);
- spin_lock(&s->lock);
+ guard(spinlock)(&s->lock);
for (i = pg_offset; i < pg_offset + pg_len; i++) {
s->s[i].nr_replicas = nr_ptrs;
@@ -171,8 +169,6 @@ static void __bch2_folio_set(struct folio *folio,
if (i == sectors)
s->uptodate = true;
-
- spin_unlock(&s->lock);
}
/*
@@ -276,10 +272,9 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
s = bch2_folio(folio);
if (s) {
- spin_lock(&s->lock);
+ guard(spinlock)(&s->lock);
for (j = folio_offset; j < folio_offset + folio_len; j++)
s->s[j].nr_replicas = 0;
- spin_unlock(&s->lock);
}
folio_unlock(folio);
@@ -330,13 +325,12 @@ int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
unsigned folio_offset = max(*start, folio_start) - folio_start;
unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
- spin_lock(&s->lock);
+ guard(spinlock)(&s->lock);
for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
i_sectors_delta -= s->s[j].state == SECTOR_dirty;
bch2_folio_sector_set(folio, s, j,
folio_sector_reserve(s->s[j].state));
}
- spin_unlock(&s->lock);
}
folio_unlock(folio);
@@ -529,29 +523,26 @@ void bch2_set_folio_dirty(struct bch_fs *c,
BUG_ON(!s->uptodate);
- spin_lock(&s->lock);
-
- for (i = round_down(offset, block_bytes(c)) >> 9;
- i < round_up(offset + len, block_bytes(c)) >> 9;
- i++) {
- unsigned sectors = sectors_to_reserve(&s->s[i],
- res->disk.nr_replicas);
-
- /*
- * This can happen if we race with the error path in
- * bch2_writepage_io_done():
- */
- sectors = min_t(unsigned, sectors, res->disk.sectors);
+ scoped_guard(spinlock, &s->lock)
+ for (i = round_down(offset, block_bytes(c)) >> 9;
+ i < round_up(offset + len, block_bytes(c)) >> 9;
+ i++) {
+ unsigned sectors = sectors_to_reserve(&s->s[i],
+ res->disk.nr_replicas);
- s->s[i].replicas_reserved += sectors;
- res->disk.sectors -= sectors;
+ /*
+ * This can happen if we race with the error path in
+ * bch2_writepage_io_done():
+ */
+ sectors = min_t(unsigned, sectors, res->disk.sectors);
- dirty_sectors += s->s[i].state == SECTOR_unallocated;
+ s->s[i].replicas_reserved += sectors;
+ res->disk.sectors -= sectors;
- bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
- }
+ dirty_sectors += s->s[i].state == SECTOR_unallocated;
- spin_unlock(&s->lock);
+ bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
+ }
bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index dc5f713e209c..93ad33f0953a 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -148,7 +148,7 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
struct quota_res *quota_res, s64 sectors)
{
if (unlikely((s64) inode->v.i_blocks + sectors < 0)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
@@ -157,7 +157,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
if (sectors < 0)
sectors = -inode->v.i_blocks;
@@ -187,7 +186,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum,
u64 *seq)
{
- struct printbuf buf = PRINTBUF;
struct bch_inode_unpacked u;
struct btree_iter iter;
int ret = bch2_inode_peek(trans, &iter, &u, inum, 0);
@@ -197,6 +195,7 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in
u64 cur_seq = journal_cur_seq(&trans->c->journal);
*seq = min(cur_seq, u.bi_journal_seq);
+ CLASS(printbuf, buf)();
if (fsck_err_on(u.bi_journal_seq > cur_seq,
trans, inode_journal_seq_in_future,
"inode journal seq in future (currently at %llu)\n%s",
@@ -208,7 +207,6 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in
}
fsck_err:
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
return ret;
}
@@ -227,7 +225,7 @@ static int bch2_flush_inode(struct bch_fs *c,
u64 seq;
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
- bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
+ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync);
@@ -267,11 +265,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol,
struct bpos start,
struct bpos end)
{
- return bch2_trans_run(c,
- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end,
subvol, 0, k, ({
- bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k);
- })));
+ bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k);
+ }));
}
static int __bch2_truncate_folio(struct bch_inode_info *inode,
@@ -521,7 +519,7 @@ int bchfs_truncate(struct mnt_idmap *idmap,
if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal))) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
@@ -531,7 +529,6 @@ int bchfs_truncate(struct mnt_idmap *idmap,
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
ret = bch2_setattr_nonsize(idmap, inode, iattr);
@@ -559,11 +556,10 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l
u64 block_start = round_up(offset, block_bytes(c));
u64 block_end = round_down(end, block_bytes(c));
bool truncated_last_page;
- int ret = 0;
- ret = bch2_truncate_folios(inode, offset, end);
+ int ret = bch2_truncate_folios(inode, offset, end);
if (unlikely(ret < 0))
- goto err;
+ return ret;
truncated_last_page = ret;
@@ -576,19 +572,18 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l
block_start >> 9, block_end >> 9,
&i_sectors_delta);
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+ if (ret)
+ return ret;
}
- mutex_lock(&inode->ei_update_lock);
- if (end >= inode->v.i_size && !truncated_last_page) {
- ret = bch2_write_inode_size(c, inode, inode->v.i_size,
- ATTR_MTIME|ATTR_CTIME);
- } else {
- ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+ guard(mutex)(&inode->ei_update_lock);
+ if (end >= inode->v.i_size && !truncated_last_page)
+ return bch2_write_inode_size(c, inode, inode->v.i_size,
+ ATTR_MTIME|ATTR_CTIME);
+ else
+ return bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME);
- }
- mutex_unlock(&inode->ei_update_lock);
-err:
- return ret;
}
static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
@@ -631,7 +626,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
u64 start_sector, u64 end_sector)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bpos end_pos = POS(inode->v.i_ino, end_sector);
struct bch_io_opts opts;
@@ -753,7 +748,6 @@ bkey_err:
}
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
@@ -802,13 +796,11 @@ static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (end >= inode->v.i_size &&
(((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) ||
!(mode & FALLOC_FL_KEEP_SIZE))) {
- spin_lock(&inode->v.i_lock);
- i_size_write(&inode->v, end);
- spin_unlock(&inode->v.i_lock);
+ scoped_guard(spinlock, &inode->v.i_lock)
+ i_size_write(&inode->v, end);
- mutex_lock(&inode->ei_update_lock);
- ret2 = bch2_write_inode_size(c, inode, end, 0);
- mutex_unlock(&inode->ei_update_lock);
+ scoped_guard(mutex, &inode->ei_update_lock)
+ ret2 = bch2_write_inode_size(c, inode, end, 0);
}
return ret ?: ret2;
@@ -861,8 +853,8 @@ static int quota_reserve_range(struct bch_inode_info *inode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
u64 sectors = end - start;
- int ret = bch2_trans_run(c,
- for_each_btree_key_in_subvolume_max(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_in_subvolume_max(trans, iter,
BTREE_ID_extents,
POS(inode->v.i_ino, start),
POS(inode->v.i_ino, end - 1),
@@ -875,7 +867,7 @@ static int quota_reserve_range(struct bch_inode_info *inode,
}
0;
- })));
+ }));
return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
}
@@ -955,10 +947,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
bch2_i_sectors_acct(c, dst, &quota_res, i_sectors_delta);
- spin_lock(&dst->v.i_lock);
- if (pos_dst + ret > dst->v.i_size)
- i_size_write(&dst->v, pos_dst + ret);
- spin_unlock(&dst->v.i_lock);
+ scoped_guard(spinlock, &dst->v.i_lock)
+ if (pos_dst + ret > dst->v.i_size)
+ i_size_write(&dst->v, pos_dst + ret);
if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
IS_SYNC(file_inode(file_dst)))
@@ -1020,38 +1011,38 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
- int ret = bch2_trans_run(c,
- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents,
POS(inode->v.i_ino, offset >> 9),
POS(inode->v.i_ino, U64_MAX),
inum.subvol, BTREE_ITER_slots, k, ({
- if (k.k->p.inode != inode->v.i_ino ||
- !bkey_extent_is_data(k.k)) {
- loff_t start_offset = k.k->p.inode == inode->v.i_ino
- ? max(offset, bkey_start_offset(k.k) << 9)
- : offset;
- loff_t end_offset = k.k->p.inode == inode->v.i_ino
- ? MAX_LFS_FILESIZE
- : k.k->p.offset << 9;
-
- /*
- * Found a hole in the btree, now make sure it's
- * a hole in the pagecache. We might have to
- * keep searching if this hole is entirely dirty
- * in the page cache:
- */
- bch2_trans_unlock(trans);
- loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v,
- start_offset, end_offset, 0, false);
- if (pagecache_hole < end_offset) {
- next_hole = pagecache_hole;
- break;
- }
- } else {
- offset = max(offset, bkey_start_offset(k.k) << 9);
+ if (k.k->p.inode != inode->v.i_ino ||
+ !bkey_extent_is_data(k.k)) {
+ loff_t start_offset = k.k->p.inode == inode->v.i_ino
+ ? max(offset, bkey_start_offset(k.k) << 9)
+ : offset;
+ loff_t end_offset = k.k->p.inode == inode->v.i_ino
+ ? MAX_LFS_FILESIZE
+ : k.k->p.offset << 9;
+
+ /*
+ * Found a hole in the btree, now make sure it's
+ * a hole in the pagecache. We might have to
+ * keep searching if this hole is entirely dirty
+ * in the page cache:
+ */
+ bch2_trans_unlock(trans);
+ loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v,
+ start_offset, end_offset, 0, false);
+ if (pagecache_hole < end_offset) {
+ next_hole = pagecache_hole;
+ break;
}
- 0;
- })));
+ } else {
+ offset = max(offset, bkey_start_offset(k.k) << 9);
+ }
+ 0;
+ }));
if (ret)
return ret;
diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
index ca70346e68dc..d229f7225da1 100644
--- a/fs/bcachefs/fs-io.h
+++ b/fs/bcachefs/fs-io.h
@@ -77,9 +77,8 @@ static inline void bch2_quota_reservation_put(struct bch_fs *c,
struct quota_res *res)
{
if (res->sectors) {
- mutex_lock(&inode->ei_quota_lock);
+ guard(mutex)(&inode->ei_quota_lock);
__bch2_quota_reservation_put(c, inode, res);
- mutex_unlock(&inode->ei_quota_lock);
}
}
@@ -94,16 +93,15 @@ static inline int bch2_quota_reservation_add(struct bch_fs *c,
if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags))
return 0;
- mutex_lock(&inode->ei_quota_lock);
+ guard(mutex)(&inode->ei_quota_lock);
ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
- if (likely(!ret)) {
- inode->ei_quota_reserved += sectors;
- res->sectors += sectors;
- }
- mutex_unlock(&inode->ei_quota_lock);
+ if (ret)
+ return ret;
- return ret;
+ inode->ei_quota_reserved += sectors;
+ res->sectors += sectors;
+ return 0;
}
#else
@@ -134,9 +132,8 @@ static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *
struct quota_res *quota_res, s64 sectors)
{
if (sectors) {
- mutex_lock(&inode->ei_quota_lock);
+ guard(mutex)(&inode->ei_quota_lock);
__bch2_i_sectors_acct(c, inode, quota_res, sectors);
- mutex_unlock(&inode->ei_quota_lock);
}
}
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 4e72e654da96..8b9d3c7d1f57 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -111,9 +111,8 @@ static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label)
BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX);
- mutex_lock(&c->sb_lock);
- memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock)
+ memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
len = strnlen(label, BCH_SB_LABEL_SIZE);
if (len == BCH_SB_LABEL_SIZE) {
@@ -152,10 +151,10 @@ static int bch2_ioc_setlabel(struct bch_fs *c,
if (ret)
return ret;
- mutex_lock(&c->sb_lock);
- strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE);
- ret = bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock) {
+ strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE);
+ ret = bch2_write_super(c);
+ }
mnt_drop_write_file(file);
return ret;
@@ -172,7 +171,7 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
if (get_user(flags, arg))
return -EFAULT;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "shutdown by ioctl type %u", flags);
@@ -193,13 +192,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
bch2_fs_emergency_read_only2(c, &buf);
break;
default:
- ret = -EINVAL;
- goto noprint;
+ return -EINVAL;
}
bch2_print_str(c, KERN_ERR, buf.buf);
-noprint:
- printbuf_exit(&buf);
return ret;
}
@@ -234,9 +230,8 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) {
/* sync_inodes_sb enforce s_umount is locked */
- down_read(&c->vfs_sb->s_umount);
+ guard(rwsem_read)(&c->vfs_sb->s_umount);
sync_inodes_sb(c->vfs_sb);
- up_read(&c->vfs_sb->s_umount);
}
if (arg.src_ptr) {
@@ -301,12 +296,10 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
!arg.src_ptr)
snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
- down_write(&c->snapshot_create_lock);
- inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
- dst_dentry, arg.mode|S_IFDIR,
- 0, snapshot_src, create_flags);
- up_write(&c->snapshot_create_lock);
-
+ scoped_guard(rwsem_write, &c->snapshot_create_lock)
+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
+ dst_dentry, arg.mode|S_IFDIR,
+ 0, snapshot_src, create_flags);
error = PTR_ERR_OR_ZERO(inode);
if (error)
goto err3;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3b0783f117ae..2789b30add10 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -106,14 +106,13 @@ int __must_check bch2_write_inode(struct bch_fs *c,
inode_set_fn set,
void *p, unsigned fields)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct btree_iter iter = {};
- struct bch_inode_unpacked inode_u;
- int ret;
+ CLASS(btree_trans, trans)(c);
retry:
bch2_trans_begin(trans);
- ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent);
+ struct btree_iter iter = {};
+ struct bch_inode_unpacked inode_u;
+ int ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent);
if (ret)
goto err;
@@ -156,7 +155,6 @@ err:
inode_inum(inode).subvol,
inode_inum(inode).inum);
- bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
@@ -166,32 +164,27 @@ int bch2_fs_quota_transfer(struct bch_fs *c,
unsigned qtypes,
enum quota_acct_mode mode)
{
- unsigned i;
- int ret;
-
qtypes &= enabled_qtypes(c);
- for (i = 0; i < QTYP_NR; i++)
+ for (unsigned i = 0; i < QTYP_NR; i++)
if (new_qid.q[i] == inode->ei_qid.q[i])
qtypes &= ~(1U << i);
if (!qtypes)
return 0;
- mutex_lock(&inode->ei_quota_lock);
+ guard(mutex)(&inode->ei_quota_lock);
- ret = bch2_quota_transfer(c, qtypes, new_qid,
+ int ret = bch2_quota_transfer(c, qtypes, new_qid,
inode->ei_qid,
inode->v.i_blocks +
inode->ei_quota_reserved,
mode);
if (!ret)
- for (i = 0; i < QTYP_NR; i++)
+ for (unsigned i = 0; i < QTYP_NR; i++)
if (qtypes & (1 << i))
inode->ei_qid.q[i] = new_qid.q[i];
- mutex_unlock(&inode->ei_quota_lock);
-
return ret;
}
@@ -241,7 +234,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
struct bch_fs *c = trans->c;
struct rhltable *ht = &c->vfs_inodes_by_inum_table;
u64 inum = p.offset;
- DARRAY(u32) subvols;
+ CLASS(darray_u32, subvols)();
int ret = 0;
if (!test_bit(BCH_FS_started, &c->flags))
@@ -280,7 +273,7 @@ restart:
rcu_read_unlock();
ret = darray_make_room(&subvols, 1);
if (ret)
- goto err;
+ return ret;
subvols.nr = 0;
goto restart_from_top;
}
@@ -303,14 +296,13 @@ restart:
u32 snap;
ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
if (ret)
- goto err;
+ return ret;
ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
if (ret)
break;
}
-err:
- darray_exit(&subvols);
+
return ret;
}
@@ -367,9 +359,9 @@ repeat:
static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)
{
- spin_lock(&inode->v.i_lock);
- bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);
- spin_unlock(&inode->v.i_lock);
+ bool remove;
+ scoped_guard(spinlock, &inode->v.i_lock)
+ remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);
if (remove) {
int ret = rhltable_remove(&c->vfs_inodes_by_inum_table,
@@ -430,9 +422,8 @@ retry:
inode_sb_list_add(&inode->v);
- mutex_lock(&c->vfs_inodes_lock);
- list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
- mutex_unlock(&c->vfs_inodes_lock);
+ scoped_guard(mutex, &c->vfs_inodes_lock)
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
return inode;
}
}
@@ -514,7 +505,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
if (inode)
return &inode->v;
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bch_inode_unpacked inode_u;
struct bch_subvolume subvol;
@@ -522,7 +513,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
- bch2_trans_put(trans);
return ret ? ERR_PTR(ret) : &inode->v;
}
@@ -534,7 +524,6 @@ __bch2_create(struct mnt_idmap *idmap,
unsigned flags)
{
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- struct btree_trans *trans;
struct bch_inode_unpacked dir_u;
struct bch_inode_info *inode;
struct bch_inode_unpacked inode_u;
@@ -555,18 +544,23 @@ __bch2_create(struct mnt_idmap *idmap,
if (ret)
return ERR_PTR(ret);
#endif
+
inode = __bch2_new_inode(c, GFP_NOFS);
if (unlikely(!inode)) {
- inode = ERR_PTR(-ENOMEM);
- goto err;
+ posix_acl_release(default_acl);
+ posix_acl_release(acl);
+ return ERR_PTR(-ENOMEM);
}
bch2_inode_init_early(c, &inode_u);
if (!(flags & BCH_CREATE_TMPFILE))
mutex_lock(&dir->ei_update_lock);
-
- trans = bch2_trans_get(c);
+ /*
+ * posix_acl_create() calls get_acl -> btree transaction, don't start
+ * ours until after, ei->update_lock must also be taken first:
+ */
+ CLASS(btree_trans, trans)(c);
retry:
bch2_trans_begin(trans);
@@ -625,7 +619,6 @@ err_before_quota:
* restart here.
*/
inode = bch2_inode_hash_insert(c, NULL, inode);
- bch2_trans_put(trans);
err:
posix_acl_release(default_acl);
posix_acl_release(acl);
@@ -634,7 +627,6 @@ err_trans:
if (!(flags & BCH_CREATE_TMPFILE))
mutex_unlock(&dir->ei_update_lock);
- bch2_trans_put(trans);
make_bad_inode(&inode->v);
iput(&inode->v);
inode = ERR_PTR(ret);
@@ -649,7 +641,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
subvol_inum inum = {};
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct qstr lookup_name;
int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
@@ -701,7 +693,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
goto err;
out:
bch2_trans_iter_exit(trans, &dirent_iter);
- printbuf_exit(&buf);
return inode;
err:
inode = ERR_PTR(ret);
@@ -770,8 +761,8 @@ static int __bch2_link(struct bch_fs *c,
struct bch_inode_unpacked dir_u, inode_u;
int ret;
- mutex_lock(&inode->ei_update_lock);
- struct btree_trans *trans = bch2_trans_get(c);
+ guard(mutex)(&inode->ei_update_lock);
+ CLASS(btree_trans, trans)(c);
ret = commit_do(trans, NULL, NULL, 0,
bch2_link_trans(trans,
@@ -785,8 +776,6 @@ static int __bch2_link(struct bch_fs *c,
bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
}
- bch2_trans_put(trans);
- mutex_unlock(&inode->ei_update_lock);
return ret;
}
@@ -821,8 +810,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
int ret;
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
-
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
@@ -849,7 +837,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
if (IS_CASEFOLDED(vdir))
d_invalidate(dentry);
err:
- bch2_trans_put(trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
return ret;
@@ -918,7 +905,6 @@ static int bch2_rename2(struct mnt_idmap *idmap,
struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
struct bch_inode_unpacked dst_dir_u, src_dir_u;
struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u;
- struct btree_trans *trans;
enum bch_rename_mode mode = flags & RENAME_EXCHANGE
? BCH_RENAME_EXCHANGE
: dst_dentry->d_inode
@@ -942,7 +928,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
src_inode,
dst_inode);
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
@@ -1028,8 +1014,6 @@ err_tx_restart:
bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
ATTR_CTIME);
err:
- bch2_trans_put(trans);
-
bch2_fs_quota_transfer(c, src_inode,
bch_qid(&src_inode->ei_inode),
1 << QTYP_PRJ,
@@ -1097,7 +1081,6 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_qid qid;
- struct btree_trans *trans;
struct btree_iter inode_iter = {};
struct bch_inode_unpacked inode_u;
struct posix_acl *acl = NULL;
@@ -1105,7 +1088,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
kgid_t kgid;
int ret;
- mutex_lock(&inode->ei_update_lock);
+ guard(mutex)(&inode->ei_update_lock);
qid = inode->ei_qid;
@@ -1122,9 +1105,9 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
KEY_TYPE_QUOTA_PREALLOC);
if (ret)
- goto err;
+ return ret;
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
retry:
bch2_trans_begin(trans);
kfree(acl);
@@ -1153,18 +1136,13 @@ btree_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
- goto err_trans;
+ return ret;
bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
if (acl)
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-err_trans:
- bch2_trans_put(trans);
-err:
- mutex_unlock(&inode->ei_update_lock);
-
- return bch2_err_class(ret);
+ return 0;
}
static int bch2_getattr(struct mnt_idmap *idmap,
@@ -1228,18 +1206,16 @@ static int bch2_setattr(struct mnt_idmap *idmap,
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- int ret;
lockdep_assert_held(&inode->v.i_rwsem);
- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
- setattr_prepare(idmap, dentry, iattr);
- if (ret)
- return ret;
+ int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
+ setattr_prepare(idmap, dentry, iattr) ?:
+ (iattr->ia_valid & ATTR_SIZE
+ ? bchfs_truncate(idmap, inode, iattr)
+ : bch2_setattr_nonsize(idmap, inode, iattr));
- return iattr->ia_valid & ATTR_SIZE
- ? bchfs_truncate(idmap, inode, iattr)
- : bch2_setattr_nonsize(idmap, inode, iattr);
+ return bch2_err_class(ret);
}
static int bch2_tmpfile(struct mnt_idmap *idmap,
@@ -1487,7 +1463,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
{
struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(vinode);
- struct btree_trans *trans;
struct bch_fiemap_extent cur, prev;
int ret = 0;
@@ -1505,7 +1480,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_bkey_buf_init(&prev.kbuf);
bkey_init(&prev.kbuf.k->k);
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
while (start < end) {
ret = lockrestart_do(trans,
@@ -1538,7 +1513,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
ret = bch2_fill_extent(c, info, &prev);
}
err:
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&cur.kbuf, c);
bch2_bkey_buf_exit(&prev.kbuf, c);
@@ -1968,7 +1942,6 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
struct bch_inode_info *inode = to_bch_ei(child->d_inode);
struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans *trans;
struct btree_iter iter1;
struct btree_iter iter2;
struct bkey_s_c k;
@@ -1983,8 +1956,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
if (!S_ISDIR(dir->v.i_mode))
return -EINVAL;
- trans = bch2_trans_get(c);
-
+ CLASS(btree_trans, trans)(c);
bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
@@ -2063,8 +2035,6 @@ err:
bch2_trans_iter_exit(trans, &iter1);
bch2_trans_iter_exit(trans, &iter2);
- bch2_trans_put(trans);
-
return ret;
}
@@ -2148,12 +2118,11 @@ static int bch2_vfs_write_inode(struct inode *vinode,
{
struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(vinode);
- int ret;
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
- ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
+ guard(mutex)(&inode->ei_update_lock);
+
+ int ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+ ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
return bch2_err_class(ret);
}
@@ -2200,9 +2169,8 @@ static void bch2_evict_inode(struct inode *vinode)
bch2_inode_hash_remove(c, inode);
}
- mutex_lock(&c->vfs_inodes_lock);
- list_del_init(&inode->ei_vfs_inode_list);
- mutex_unlock(&c->vfs_inodes_lock);
+ scoped_guard(mutex, &c->vfs_inodes_lock)
+ list_del_init(&inode->ei_vfs_inode_list);
}
void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
@@ -2352,16 +2320,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
static int bch2_show_options(struct seq_file *seq, struct dentry *root)
{
struct bch_fs *c = root->d_sb->s_fs_info;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
printbuf_nul_terminate(&buf);
seq_printf(seq, ",%s", buf.buf);
- int ret = buf.allocation_failure ? -ENOMEM : 0;
- printbuf_exit(&buf);
- return ret;
+ return buf.allocation_failure ? -ENOMEM : 0;
}
static void bch2_put_super(struct super_block *sb)
@@ -2383,24 +2349,20 @@ static int bch2_freeze(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
bch2_fs_read_only(c);
- up_write(&c->state_lock);
return 0;
}
static int bch2_unfreeze(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
- int ret;
if (test_bit(BCH_FS_emergency_ro, &c->flags))
return 0;
- down_write(&c->state_lock);
- ret = bch2_fs_read_write(c);
- up_write(&c->state_lock);
- return ret;
+ guard(rwsem_write)(&c->state_lock);
+ return bch2_fs_read_write(c);
}
static const struct super_operations bch_super_operations = {
@@ -2671,7 +2633,7 @@ static int bch2_fs_reconfigure(struct fs_context *fc)
opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
if (opts->opts.read_only != c->opts.read_only) {
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
if (opts->opts.read_only) {
bch2_fs_read_only(c);
@@ -2681,22 +2643,18 @@ static int bch2_fs_reconfigure(struct fs_context *fc)
ret = bch2_fs_read_write(c);
if (ret) {
bch_err(c, "error going rw: %i", ret);
- up_write(&c->state_lock);
- ret = -EINVAL;
- goto err;
+ return -EINVAL;
}
sb->s_flags &= ~SB_RDONLY;
}
c->opts.read_only = opts->opts.read_only;
-
- up_write(&c->state_lock);
}
if (opt_defined(opts->opts, errors))
c->opts.errors = opts->opts.errors;
-err:
+
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 471e93a3f00c..df0aa2522b18 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -53,10 +53,9 @@ static int dirent_points_to_inode(struct bch_fs *c,
{
int ret = dirent_points_to_inode_nowarn(c, dirent, inode);
if (ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
dirent_inode_mismatch_msg(&buf, c, dirent, inode);
bch_warn(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
return ret;
}
@@ -253,14 +252,13 @@ create_lostfound:
* XXX: we could have a nicer log message here if we had a nice way to
* walk backpointers to print a path
*/
- struct printbuf path = PRINTBUF;
+ CLASS(printbuf, path)();
ret = bch2_inum_to_path(trans, root_inum, &path);
if (ret)
goto err;
bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u",
path.buf, root_inum.subvol, snapshot);
- printbuf_exit(&path);
u64 now = bch2_current_time(c);
u64 cpu = raw_smp_processor_id();
@@ -455,7 +453,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
* whiteouts for the dirent we just created.
*/
if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) {
- snapshot_id_list whiteouts_done;
+ CLASS(snapshot_id_list, whiteouts_done)();
struct btree_iter iter;
struct bkey_s_c k;
@@ -499,7 +497,6 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
break;
}
}
- darray_exit(&whiteouts_done);
bch2_trans_iter_exit(trans, &iter);
}
@@ -683,11 +680,15 @@ static inline void snapshots_seen_exit(struct snapshots_seen *s)
darray_exit(&s->ids);
}
-static inline void snapshots_seen_init(struct snapshots_seen *s)
+static inline struct snapshots_seen snapshots_seen_init(void)
{
- memset(s, 0, sizeof(*s));
+ return (struct snapshots_seen) {};
}
+DEFINE_CLASS(snapshots_seen, struct snapshots_seen,
+ snapshots_seen_exit(&_T),
+ snapshots_seen_init(), void)
+
static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{
u32 *i;
@@ -815,9 +816,13 @@ static void inode_walker_exit(struct inode_walker *w)
static struct inode_walker inode_walker_init(void)
{
- return (struct inode_walker) { 0, };
+ return (struct inode_walker) {};
}
+DEFINE_CLASS(inode_walker, struct inode_walker,
+ inode_walker_exit(&_T),
+ inode_walker_init(), void)
+
static int add_inode(struct bch_fs *c, struct inode_walker *w,
struct bkey_s_c inode)
{
@@ -917,7 +922,7 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
if (!i)
return NULL;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
@@ -967,10 +972,8 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
goto fsck_err;
}
- printbuf_exit(&buf);
return i;
fsck_err:
- printbuf_exit(&buf);
return ERR_PTR(ret);
}
@@ -1004,27 +1007,25 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans,
return 0;
struct bkey_i_dirent *d = bkey_i_to_dirent(new);
- struct inode_walker target = inode_walker_init();
- int ret = 0;
+ CLASS(inode_walker, target)();
if (d->v.d_type == DT_SUBVOL) {
bch_err(trans->c, "%s does not support DT_SUBVOL", __func__);
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ return bch_err_throw(trans->c, fsck_repair_unimplemented);
} else {
- ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
+ int ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
if (ret)
- goto err;
+ return ret;
darray_for_each(target.inodes, i) {
i->inode.bi_dir_offset = d->k.p.offset;
ret = __bch2_fsck_write_inode(trans, &i->inode);
if (ret)
- goto err;
+ return ret;
}
+
+ return 0;
}
-err:
- inode_walker_exit(&target);
- return ret;
}
static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
@@ -1056,7 +1057,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
bool *write_inode)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
u32 inode_snapshot = inode->bi_snapshot;
struct btree_iter dirent_iter = {};
@@ -1106,7 +1107,6 @@ out:
ret = 0;
fsck_err:
bch2_trans_iter_exit(trans, &dirent_iter);
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -1118,7 +1118,7 @@ static int check_inode(struct btree_trans *trans,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct bch_inode_unpacked u;
bool do_update = false;
int ret;
@@ -1234,7 +1234,7 @@ static int check_inode(struct btree_trans *trans,
*/
ret = check_inode_deleted_list(trans, k.k->p);
if (ret < 0)
- goto err_noprint;
+ return ret;
fsck_err_on(!ret,
trans, unlinked_inode_not_on_deleted_list,
@@ -1255,7 +1255,7 @@ static int check_inode(struct btree_trans *trans,
u.bi_inum, u.bi_snapshot)) {
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck deleting inode");
- goto err_noprint;
+ return ret;
}
ret = 0;
}
@@ -1316,33 +1316,26 @@ do_update:
ret = __bch2_fsck_write_inode(trans, &u);
bch_err_msg(c, ret, "in fsck updating inode");
if (ret)
- goto err_noprint;
+ return ret;
}
err:
fsck_err:
bch_err_fn(c, ret);
-err_noprint:
- printbuf_exit(&buf);
return ret;
}
int bch2_check_inodes(struct bch_fs *c)
{
struct bch_inode_unpacked snapshot_root = {};
- struct snapshots_seen s;
- snapshots_seen_init(&s);
+ CLASS(btree_trans, trans)(c);
+ CLASS(snapshots_seen, s)();
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_inode(trans, &iter, k, &snapshot_root, &s)));
-
- snapshots_seen_exit(&s);
- bch_err_fn(c, ret);
- return ret;
+ check_inode(trans, &iter, k, &snapshot_root, &s));
}
static int find_oldest_inode_needs_reattach(struct btree_trans *trans,
@@ -1390,7 +1383,7 @@ static int check_unreachable_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
if (!bkey_is_inode(k.k))
@@ -1414,7 +1407,6 @@ static int check_unreachable_inode(struct btree_trans *trans,
buf.buf)))
ret = reattach_inode(trans, &inode);
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1430,14 +1422,12 @@ fsck_err:
*/
int bch2_check_unreachable_inodes(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_unreachable_inode(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ check_unreachable_inode(trans, &iter, k));
}
static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode)
@@ -1461,7 +1451,7 @@ static int check_key_has_inode(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct btree_iter iter2 = {};
int ret = PTR_ERR_OR_ZERO(i);
if (ret)
@@ -1557,7 +1547,6 @@ out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter2);
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
delete:
@@ -1627,23 +1616,28 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
if (i->inode.bi_sectors == i->count)
continue;
+ CLASS(printbuf, buf)();
+ lockrestart_do(trans,
+ bch2_inum_snapshot_to_path(trans,
+ i->inode.bi_inum,
+ i->inode.bi_snapshot, NULL, &buf));
+
count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (w->recalculate_sums)
i->count = count2;
if (i->count != count2) {
- bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
+ bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s",
+ i->count, count2, buf.buf);
i->count = count2;
}
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) &&
i->inode.bi_sectors != i->count,
trans, inode_i_sectors_wrong,
- "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
- w->last_pos.inode, i->inode.bi_snapshot,
- i->inode.bi_sectors, i->count)) {
+ "incorrect i_sectors: got %llu, should be %llu\n%s",
+ i->inode.bi_sectors, i->count, buf.buf)) {
i->inode.bi_sectors = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)
@@ -1686,11 +1680,15 @@ static void extent_ends_exit(struct extent_ends *extent_ends)
darray_exit(&extent_ends->e);
}
-static void extent_ends_init(struct extent_ends *extent_ends)
+static struct extent_ends extent_ends_init(void)
{
- memset(extent_ends, 0, sizeof(*extent_ends));
+ return (struct extent_ends) {};
}
+DEFINE_CLASS(extent_ends, struct extent_ends,
+ extent_ends_exit(&_T),
+ extent_ends_init(), void)
+
static int extent_ends_at(struct bch_fs *c,
struct extent_ends *extent_ends,
struct snapshots_seen *seen,
@@ -1730,7 +1728,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
struct extent_end *extent_end)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct btree_iter iter1, iter2 = {};
struct bkey_s_c k1, k2;
int ret;
@@ -1836,7 +1834,6 @@ fsck_err:
err:
bch2_trans_iter_exit(trans, &iter2);
bch2_trans_iter_exit(trans, &iter1);
- printbuf_exit(&buf);
return ret;
}
@@ -1893,11 +1890,10 @@ static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *it
bkey_for_each_crc(k.k, ptrs, crc, i)
if (crc_is_encoded(crc) &&
crc.uncompressed_size > encoded_extent_max_sectors) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
- printbuf_exit(&buf);
}
return 0;
@@ -1911,7 +1907,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct disk_reservation *res)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
ret = bch2_check_key_has_snapshot(trans, iter, k);
@@ -2004,7 +2000,6 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
out:
err:
fsck_err:
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -2015,49 +2010,41 @@ fsck_err:
*/
int bch2_check_extents(struct bch_fs *c)
{
- struct inode_walker w = inode_walker_init();
- struct snapshots_seen s;
- struct extent_ends extent_ends;
struct disk_reservation res = { 0 };
- snapshots_seen_init(&s);
- extent_ends_init(&extent_ends);
+ CLASS(btree_trans, trans)(c);
+ CLASS(snapshots_seen, s)();
+ CLASS(inode_walker, w)();
+ CLASS(extent_ends, extent_ends)();
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_extents,
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
bch2_disk_reservation_put(c, &res);
check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?:
check_extent_overbig(trans, &iter, k);
})) ?:
- check_i_sectors_notnested(trans, &w));
+ check_i_sectors_notnested(trans, &w);
bch2_disk_reservation_put(c, &res);
- extent_ends_exit(&extent_ends);
- inode_walker_exit(&w);
- snapshots_seen_exit(&s);
-
- bch_err_fn(c, ret);
return ret;
}
int bch2_check_indirect_extents(struct bch_fs *c)
{
+ CLASS(btree_trans, trans)(c);
struct disk_reservation res = { 0 };
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
POS_MIN,
BTREE_ITER_prefetch, k,
&res, NULL,
BCH_TRANS_COMMIT_no_enospc, ({
bch2_disk_reservation_put(c, &res);
check_extent_overbig(trans, &iter, k);
- })));
+ }));
bch2_disk_reservation_put(c, &res);
- bch_err_fn(c, ret);
return ret;
}
@@ -2150,7 +2137,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u32 parent_snapshot;
u32 new_parent_subvol = 0;
u64 parent_inum;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
@@ -2274,7 +2261,6 @@ out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &subvol_iter);
- printbuf_exit(&buf);
return ret;
}
@@ -2288,39 +2274,37 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
{
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
ret = bch2_check_key_has_snapshot(trans, iter, k);
- if (ret) {
- ret = ret < 0 ? ret : 0;
- goto out;
- }
+ if (ret)
+ return ret < 0 ? ret : 0;
ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
if (ret)
- goto err;
+ return ret;
if (k.k->type == KEY_TYPE_whiteout)
- goto out;
+ return 0;
if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
ret = check_subdir_dirents_count(trans, dir);
if (ret)
- goto err;
+ return ret;
}
i = walk_inode(trans, dir, k);
ret = PTR_ERR_OR_ZERO(i);
- if (ret < 0)
- goto err;
+ if (ret)
+ return ret;
ret = check_key_has_inode(trans, iter, dir, i, k);
if (ret)
- goto err;
+ return ret;
if (!i || i->whiteout)
- goto out;
+ return 0;
if (dir->first_this_inode)
*hash_info = bch2_hash_info_init(c, &i->inode);
@@ -2331,15 +2315,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
if (ret < 0)
- goto err;
- if (ret) {
- /* dirent has been deleted */
- ret = 0;
- goto out;
- }
-
+ return ret;
+ if (ret)
+ return 0; /* dirent has been deleted */
if (k.k->type != KEY_TYPE_dirent)
- goto out;
+ return 0;
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
@@ -2364,13 +2344,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
d.v->d_type, &name, NULL, target);
ret = PTR_ERR_OR_ZERO(new_d);
if (ret)
- goto out;
+ return ret;
new_d->k.p.inode = d.k->p.inode;
new_d->k.p.snapshot = d.k->p.snapshot;
struct btree_iter dup_iter = {};
- ret = bch2_hash_delete_at(trans,
+ return bch2_hash_delete_at(trans,
bch2_dirent_hash_desc, hash_info, iter,
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_str_hash_repair_key(trans, s,
@@ -2378,17 +2358,16 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
iter, bkey_i_to_s_c(&new_d->k_i),
&dup_iter, bkey_s_c_null,
need_second_pass);
- goto out;
}
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
if (ret)
- goto err;
+ return ret;
} else {
ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
if (ret)
- goto err;
+ return ret;
if (!target->inodes.nr) {
ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum),
@@ -2405,13 +2384,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
buf.buf))) {
ret = bch2_fsck_remove_dirent(trans, d.k->p);
if (ret)
- goto err;
+ return ret;
}
darray_for_each(target->inodes, i) {
ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true);
if (ret)
- goto err;
+ return ret;
}
darray_for_each(target->deletes, i)
@@ -2434,24 +2413,27 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &delete_iter);
if (ret)
- goto err;
+ return ret;
}
}
+ /*
+ * Cannot access key values after doing a transaction commit without
+ * revalidating:
+ */
+ bool have_dir = d.v->d_type == DT_DIR;
+
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
- goto err;
+ return ret;
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) {
- if (d.v->d_type == DT_DIR)
+ if (have_dir)
i->count++;
i->i_size += bkey_bytes(d.k);
}
-out:
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -2461,23 +2443,21 @@ fsck_err:
*/
int bch2_check_dirents(struct bch_fs *c)
{
- struct inode_walker dir = inode_walker_init();
- struct inode_walker target = inode_walker_init();
- struct snapshots_seen s;
struct bch_hash_info hash_info;
+ CLASS(btree_trans, trans)(c);
+ CLASS(snapshots_seen, s)();
+ CLASS(inode_walker, dir)();
+ CLASS(inode_walker, target)();
bool need_second_pass = false, did_second_pass = false;
int ret;
-
- snapshots_seen_init(&s);
again:
- ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s,
&need_second_pass)) ?:
- check_subdir_count_notnested(trans, &dir));
+ check_subdir_count_notnested(trans, &dir);
if (!ret && need_second_pass && !did_second_pass) {
bch_info(c, "check_dirents requires second pass");
@@ -2490,10 +2470,6 @@ again:
ret = -EINVAL;
}
- snapshots_seen_exit(&s);
- inode_walker_exit(&dir);
- inode_walker_exit(&target);
- bch_err_fn(c, ret);
return ret;
}
@@ -2536,21 +2512,17 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
*/
int bch2_check_xattrs(struct bch_fs *c)
{
- struct inode_walker inode = inode_walker_init();
struct bch_hash_info hash_info;
- int ret = 0;
+ CLASS(btree_trans, trans)(c);
+ CLASS(inode_walker, inode)();
- ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots,
k,
NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
- check_xattr(trans, &iter, k, &hash_info, &inode)));
-
- inode_walker_exit(&inode);
- bch_err_fn(c, ret);
+ check_xattr(trans, &iter, k, &hash_info, &inode));
return ret;
}
@@ -2615,18 +2587,17 @@ fsck_err:
/* Get root directory, create if it doesn't exist: */
int bch2_check_root(struct bch_fs *c)
{
- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_root_trans(trans));
- bch_err_fn(c, ret);
- return ret;
+ CLASS(btree_trans, trans)(c);
+ return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_root_trans(trans));
}
static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct btree_iter parent_iter = {};
- darray_u32 subvol_path = {};
- struct printbuf buf = PRINTBUF;
+ CLASS(darray_u32, subvol_path)();
+ CLASS(printbuf, buf)();
int ret = 0;
if (k.k->type != KEY_TYPE_subvolume)
@@ -2686,21 +2657,17 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
}
fsck_err:
err:
- printbuf_exit(&buf);
- darray_exit(&subvol_path);
bch2_trans_iter_exit(trans, &parent_iter);
return ret;
}
int bch2_check_subvolume_structure(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_subvol_path(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ check_subvol_path(trans, &iter, k));
}
static int bch2_bi_depth_renumber_one(struct btree_trans *trans,
@@ -2751,8 +2718,8 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
- darray_u64 path = {};
- struct printbuf buf = PRINTBUF;
+ CLASS(darray_u64, path)();
+ CLASS(printbuf, buf)();
u32 snapshot = inode_k.k->p.snapshot;
bool redo_bi_depth = false;
u32 min_bi_depth = U32_MAX;
@@ -2858,8 +2825,6 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
- darray_exit(&path);
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -2870,8 +2835,8 @@ fsck_err:
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
@@ -2883,10 +2848,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
continue;
check_path_loop(trans, k);
- })));
-
- bch_err_fn(c, ret);
- return ret;
+ }));
}
struct nlink_table {
@@ -2970,8 +2932,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t,
u64 start, u64 *end)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_inodes,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes,
POS(0, start),
BTREE_ITER_intent|
BTREE_ITER_prefetch|
@@ -3006,7 +2968,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
break;
}
0;
- })));
+ }));
bch_err_fn(c, ret);
return ret;
@@ -3016,12 +2978,10 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end)
{
- struct snapshots_seen s;
-
- snapshots_seen_init(&s);
+ CLASS(btree_trans, trans)(c);
+ CLASS(snapshots_seen, s)();
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k, ({
@@ -3038,9 +2998,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
le64_to_cpu(d.v->d_inum), d.k->p.snapshot);
}
0;
- })));
-
- snapshots_seen_exit(&s);
+ }));
bch_err_fn(c, ret);
return ret;
@@ -3094,14 +3052,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct nlink_table *links,
u64 range_start, u64 range_end)
{
+ CLASS(btree_trans, trans)(c);
size_t idx = 0;
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS(0, range_start),
BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end));
if (ret < 0) {
bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret));
return ret;
@@ -3140,7 +3098,6 @@ int bch2_check_nlinks(struct bch_fs *c)
} while (next_iter_range_start != U64_MAX);
kvfree(links.d);
- bch_err_fn(c, ret);
return ret;
}
@@ -3175,15 +3132,13 @@ int bch2_fix_reflink_p(struct bch_fs *c)
if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
return 0;
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_extents, POS_MIN,
BTREE_ITER_intent|BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- fix_reflink_p_key(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ fix_reflink_p_key(trans, &iter, k));
}
#ifndef NO_BCACHEFS_CHARDEV
@@ -3209,6 +3164,8 @@ static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
if (ret)
return ret;
+ thr->c->recovery_task = current;
+
ret = bch2_fs_start(thr->c);
if (ret)
goto err;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 307fb0c95656..4a9725f30c4f 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -417,7 +417,8 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
struct bch_inode_unpacked *inode)
{
- return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode));
+ CLASS(btree_trans, trans)(c);
+ return lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, inode));
}
int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum,
@@ -1132,7 +1133,7 @@ err:
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter = {};
struct bkey_s_c k;
struct bch_inode_unpacked inode;
@@ -1141,7 +1142,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode));
if (ret)
- goto err2;
+ return ret;
/*
* If this was a directory, there shouldn't be any real dirents left -
@@ -1156,7 +1157,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
: bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs);
if (ret)
- goto err2;
+ return ret;
retry:
bch2_trans_begin(trans);
@@ -1188,12 +1189,9 @@ err:
goto retry;
if (ret)
- goto err2;
+ return ret;
- ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot));
-err2:
- bch2_trans_put(trans);
- return ret;
+ return delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot));
}
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
@@ -1413,7 +1411,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
struct bch_fs *c = trans->c;
struct btree_iter inode_iter;
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret;
k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached);
@@ -1506,7 +1504,6 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
- printbuf_exit(&buf);
return ret;
delete:
ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
@@ -1524,29 +1521,23 @@ static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum,
int bch2_delete_dead_inodes(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
- int ret;
-
+ CLASS(btree_trans, trans)(c);
/*
* if we ran check_inodes() unlinked inodes will have already been
* cleaned up but the write buffer will be out of sync; therefore we
* alway need a write buffer flush
- */
- ret = bch2_btree_write_buffer_flush_sync(trans);
- if (ret)
- goto err;
-
- /*
+ *
* Weird transaction restart handling here because on successful delete,
* bch2_inode_rm_snapshot() will return a nested transaction restart,
* but we can't retry because the btree write buffer won't have been
* flushed and we'd spin:
*/
- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+ return bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
struct bch_inode_unpacked inode;
- ret = may_delete_deleted_inode(trans, k.k->p, &inode, true);
+ int ret = may_delete_deleted_inode(trans, k.k->p, &inode, true);
if (ret > 0) {
bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u",
k.k->p.offset, k.k->p.snapshot);
@@ -1567,8 +1558,4 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
ret;
}));
-err:
- bch2_trans_put(trans);
- bch_err_fn(c, ret);
- return ret;
}
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 07023667a475..5d6681c070ba 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -114,12 +114,11 @@ err:
if (!ret && sectors_allocated)
bch2_increment_clock(c, sectors_allocated, WRITE);
if (should_print_err(ret)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9));
prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
err_noprint:
bch2_open_buckets_put(c, &open_buckets);
@@ -222,23 +221,18 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
s64 *i_sectors_delta)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct btree_iter iter;
- int ret;
+ CLASS(btree_trans, trans)(c);
+ struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inum.inum, start),
BTREE_ITER_intent);
- ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
+ int ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
-
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- ret = 0;
- return ret;
+ return bch2_err_matches(ret, BCH_ERR_transaction_restart) ? 0 : ret;
}
/* truncate: */
@@ -319,17 +313,13 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
- down_read(&c->snapshot_create_lock);
- struct btree_trans *trans = bch2_trans_get(c);
+ guard(rwsem_read)(&c->snapshot_create_lock);
+ CLASS(btree_trans, trans)(c);
int ret = bch2_logged_op_start(trans, &op.k_i);
if (ret)
- goto out;
+ return ret;
ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta);
ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret;
-out:
- bch2_trans_put(trans);
- up_read(&c->snapshot_create_lock);
-
return ret;
}
@@ -555,16 +545,12 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
- down_read(&c->snapshot_create_lock);
- struct btree_trans *trans = bch2_trans_get(c);
+ guard(rwsem_read)(&c->snapshot_create_lock);
+ CLASS(btree_trans, trans)(c);
int ret = bch2_logged_op_start(trans, &op.k_i);
if (ret)
- goto out;
+ return ret;
ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta);
ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret;
-out:
- bch2_trans_put(trans);
- up_read(&c->snapshot_create_lock);
-
return ret;
}
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 210b6adc359f..b8ccd8c930e1 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, "");
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now)
+{
+ s64 congested = atomic_read(&ca->congested);
+ u64 last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ return clamp(congested, 0LL, CONGESTED_MAX);
+}
+
static bool bch2_target_congested(struct bch_fs *c, u16 target)
{
const struct bch_devs_mask *devs;
unsigned d, nr = 0, total = 0;
- u64 now = local_clock(), last;
- s64 congested;
- struct bch_dev *ca;
-
- if (!target)
- return false;
+ u64 now = local_clock();
guard(rcu)();
devs = bch2_target_to_mask(c, target) ?:
&c->rw_devs[BCH_DATA_user];
for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
- ca = rcu_dereference(c->devs[d]);
+ struct bch_dev *ca = rcu_dereference(c->devs[d]);
if (!ca)
continue;
- congested = atomic_read(&ca->congested);
- last = READ_ONCE(ca->congested_last);
- if (time_after64(now, last))
- congested -= (now - last) >> 12;
-
- total += max(congested, 0LL);
+ total += bch2_dev_congested_read(ca, now);
nr++;
}
return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
+void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ printbuf_tabstop_push(out, 32);
+
+ prt_printf(out, "current:\t%u%%\n",
+ bch2_dev_congested_read(ca, local_clock()) *
+ 100 / CONGESTED_MAX);
+
+ prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX);
+
+ prt_printf(out, "last io over threshold:\t");
+ bch2_pr_time_units(out, local_clock() - ca->congested_last);
+ prt_newline(out);
+
+ prt_printf(out, "read latency threshold:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2);
+ prt_newline(out);
+
+ prt_printf(out, "median read latency:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m);
+ prt_newline(out);
+
+ prt_printf(out, "write latency threshold:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3);
+ prt_newline(out);
+
+ prt_printf(out, "median write latency:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m);
+ prt_newline(out);
+}
+
#else
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
if (!have_io_error(failed)) {
BUG_ON(!opts.promote_target);
- if (!(flags & BCH_READ_may_promote))
+ if (!(flags & BCH_READ_may_promote)) {
+ count_event(c, io_read_nopromote_may_not);
return bch_err_throw(c, nopromote_may_not);
+ }
- if (bch2_bkey_has_target(c, k, opts.promote_target))
+ if (bch2_bkey_has_target(c, k, opts.promote_target)) {
+ count_event(c, io_read_nopromote_already_promoted);
return bch_err_throw(c, nopromote_already_promoted);
+ }
- if (bkey_extent_is_unwritten(k))
+ if (bkey_extent_is_unwritten(k)) {
+ count_event(c, io_read_nopromote_unwritten);
return bch_err_throw(c, nopromote_unwritten);
+ }
- if (bch2_target_congested(c, opts.promote_target))
+ if (bch2_target_congested(c, opts.promote_target)) {
+ count_event(c, io_read_nopromote_congested);
return bch_err_throw(c, nopromote_congested);
+ }
}
if (rhashtable_lookup_fast(&c->promote_table, &pos,
- bch_promote_params))
+ bch_promote_params)) {
+ count_event(c, io_read_nopromote_in_flight);
return bch_err_throw(c, nopromote_in_flight);
+ }
return 0;
}
@@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio)
BUG_ON(ret);
async_object_list_del(c, promote, op->list_idx);
+ async_object_list_del(c, rbio, rbio->list_idx);
bch2_data_update_exit(&op->write);
@@ -343,16 +389,27 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
return promote;
nopromote:
- trace_io_read_nopromote(c, ret);
+ if (trace_io_read_nopromote_enabled()) {
+ CLASS(printbuf, buf)();
+ printbuf_indent_add_nextline(&buf, 2);
+ prt_printf(&buf, "%s\n", bch2_err_str(ret));
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ trace_io_read_nopromote(c, buf.buf);
+ }
+ count_event(c, io_read_nopromote);
+
return NULL;
}
-void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op)
+void bch2_promote_op_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct promote_op *op)
{
if (!op->write.read_done) {
prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
printbuf_indent_add(out, 2);
- bch2_read_bio_to_text(out, op->write.rbio.parent);
+ bch2_read_bio_to_text(out, c, op->write.rbio.parent);
printbuf_indent_sub(out, 2);
}
@@ -380,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o
static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
struct bch_read_bio *rbio, struct bpos read_pos)
{
- bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos));
+ CLASS(btree_trans, trans)(c);
+ bch2_read_err_msg_trans(trans, out, rbio, read_pos);
}
enum rbio_context {
@@ -450,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
if (rbio->start_time)
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
rbio->start_time);
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ if (rbio->list_idx)
+ async_object_list_del(rbio->c, rbio, rbio->list_idx);
+#endif
bio_endio(&rbio->bio);
}
@@ -577,7 +639,7 @@ static void bch2_rbio_retry(struct work_struct *work)
};
struct bch_io_failures failed = { .nr = 0 };
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
@@ -619,7 +681,7 @@ static void bch2_rbio_retry(struct work_struct *work)
}
if (failed.nr || ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
lockrestart_do(trans,
@@ -647,12 +709,10 @@ static void bch2_rbio_retry(struct work_struct *work)
bch2_io_failures_to_text(&buf, c, &failed);
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
bch2_rbio_done(rbio);
bch2_bkey_buf_exit(&sk, c);
- bch2_trans_put(trans);
}
static void bch2_rbio_error(struct bch_read_bio *rbio,
@@ -740,8 +800,9 @@ out:
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
- bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_rbio_narrow_crcs(trans, rbio));
+ CLASS(btree_trans, trans)(rbio->c);
+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_rbio_narrow_crcs(trans, rbio));
}
static void bch2_read_decompress_err(struct work_struct *work)
@@ -749,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work)
struct bch_read_bio *rbio =
container_of(work, struct bch_read_bio, work);
struct bch_fs *c = rbio->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
prt_str(&buf, "decompression error");
@@ -761,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work)
bch_err_ratelimited(c, "%s", buf.buf);
bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
}
static void bch2_read_decrypt_err(struct work_struct *work)
@@ -769,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work)
struct bch_read_bio *rbio =
container_of(work, struct bch_read_bio, work);
struct bch_fs *c = rbio->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
prt_str(&buf, "decrypt error");
@@ -781,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work)
bch_err_ratelimited(c, "%s", buf.buf);
bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
}
/* Inner part that may run in process context */
@@ -963,7 +1022,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
@@ -1000,7 +1059,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
bch2_fs_inconsistent(c, "%s", buf.buf);
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
}
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
@@ -1052,25 +1110,22 @@ retry_pick:
trace_and_count(c, io_read_fail_and_poison, &orig->bio);
}
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
bch2_bkey_val_to_text(&buf, c, k);
-
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
goto err;
}
if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) &&
!c->chacha20_key_set) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
ret = bch_err_throw(c, data_read_no_encryption_key);
goto err;
}
@@ -1439,13 +1494,12 @@ err:
if (unlikely(ret)) {
if (ret != -BCH_ERR_extent_poisoned) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
bvec_iter.bi_sector << 9));
prt_printf(&buf, "data read error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1467,19 +1521,34 @@ static const char * const bch2_read_bio_flags[] = {
NULL
};
-void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
+void bch2_read_bio_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_read_bio *rbio)
{
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 20);
+
+ bch2_read_err_msg(c, out, rbio, rbio->read_pos);
+ prt_newline(out);
+
+ /* Are we in a retry? */
+
+ printbuf_indent_add(out, 2);
+
u64 now = local_clock();
- prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0);
- prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0);
+ prt_printf(out, "start_time:\t");
+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time));
+ prt_newline(out);
+
+ prt_printf(out, "submit_time:\t");
+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time));
+ prt_newline(out);
if (!rbio->split)
prt_printf(out, "end_io:\t%ps\n", rbio->end_io);
else
prt_printf(out, "parent:\t%px\n", rbio->parent);
- prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io);
-
prt_printf(out, "promote:\t%u\n", rbio->promote);
prt_printf(out, "bounce:\t%u\n", rbio->bounce);
prt_printf(out, "split:\t%u\n", rbio->split);
@@ -1498,6 +1567,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
prt_newline(out);
bch2_bio_to_text(out, &rbio->bio);
+ printbuf_indent_sub(out, 2);
}
void bch2_fs_io_read_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index cfc8ef35b14d..9d63d5914b20 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -7,6 +7,10 @@
#include "extents_types.h"
#include "reflink.h"
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *);
+#endif
+
struct bch_read_bio {
struct bch_fs *c;
u64 start_time;
@@ -161,11 +165,11 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
rbio->subvol = inum.subvol;
- bch2_trans_run(c,
- __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
- BCH_READ_retry_if_stale|
- BCH_READ_may_promote|
- BCH_READ_user_mapped));
+ CLASS(btree_trans, trans)(c);
+ __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
+ BCH_READ_retry_if_stale|
+ BCH_READ_may_promote|
+ BCH_READ_user_mapped);
}
static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
@@ -207,8 +211,8 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio,
}
struct promote_op;
-void bch2_promote_op_to_text(struct printbuf *, struct promote_op *);
-void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *);
+void bch2_promote_op_to_text(struct printbuf *, struct bch_fs *, struct promote_op *);
+void bch2_read_bio_to_text(struct printbuf *, struct bch_fs *, struct bch_read_bio *);
void bch2_fs_io_read_exit(struct bch_fs *);
int bch2_fs_io_read_init(struct bch_fs *);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 88b1eec8eff3..d7620138e038 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -32,6 +32,7 @@
#include "trace.h"
#include <linux/blkdev.h>
+#include <linux/moduleparam.h>
#include <linux/prefetch.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
@@ -54,14 +55,9 @@ static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
s64 latency_over = io_latency - latency_threshold;
if (latency_threshold && latency_over > 0) {
- /*
- * bump up congested by approximately latency_over * 4 /
- * latency_threshold - we don't need much accuracy here so don't
- * bother with the divide:
- */
if (atomic_read(&ca->congested) < CONGESTED_MAX)
- atomic_add(latency_over >>
- max_t(int, ilog2(latency_threshold) - 2, 0),
+ atomic_add((u32) min(U32_MAX, io_latency * 2) /
+ (u32) min(U32_MAX, latency_threshold),
&ca->congested);
ca->congested_last = now;
@@ -260,7 +256,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors);
if (unlikely(bi_sectors + i_sectors_delta < 0)) {
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta);
@@ -268,7 +264,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
if (i_sectors_delta < 0)
i_sectors_delta = -bi_sectors;
@@ -374,7 +369,6 @@ static int bch2_write_index_default(struct bch_write_op *op)
struct bkey_buf sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
subvol_inum inum = {
.subvol = op->subvol,
@@ -384,6 +378,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
BUG_ON(!inum.subvol);
+ CLASS(btree_trans, trans)(c);
bch2_bkey_buf_init(&sk);
do {
@@ -420,7 +415,6 @@ static int bch2_write_index_default(struct bch_write_op *op)
bch2_cut_front(iter.pos, k);
} while (!bch2_keylist_empty(keys));
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
@@ -430,7 +424,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (op->subvol) {
bch2_inum_offset_err_msg(op->c, &buf,
@@ -457,7 +451,6 @@ void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, .
}
bch_err_ratelimited(op->c, "%s", buf.buf);
- printbuf_exit(&buf);
}
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@@ -469,8 +462,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
struct bch_write_bio *n;
unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE;
unsigned ref_idx = type == BCH_DATA_btree
- ? BCH_DEV_READ_REF_btree_node_write
- : BCH_DEV_WRITE_REF_io_write;
+ ? (unsigned) BCH_DEV_READ_REF_btree_node_write
+ : (unsigned) BCH_DEV_WRITE_REF_io_write;
BUG_ON(c->opts.nochanges);
@@ -1222,6 +1215,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op,
static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bch_write_op *op,
struct bkey_i *orig,
struct bkey_s_c k,
u64 new_i_size)
@@ -1231,11 +1225,13 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return 0;
}
- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans,
+ bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance));
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
+ bkey_reassemble(new, k);
bch2_cut_front(bkey_start_pos(&orig->k), new);
bch2_cut_back(orig->k.p, new);
@@ -1243,6 +1239,8 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
bkey_for_each_ptr(ptrs, ptr)
ptr->unwritten = 0;
+ bch2_bkey_set_needs_rebalance(op->c, &op->opts, new);
+
/*
* Note that we're not calling bch2_subvol_get_snapshot() in this path -
* that was done when we kicked off the write, and here it's important
@@ -1267,7 +1265,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
bkey_start_pos(&orig->k), orig->k.p,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
+ bch2_nocow_write_convert_one_unwritten(trans, &iter, op, orig, k, op->new_i_size);
}));
if (ret)
break;
@@ -1476,7 +1474,7 @@ err_bucket_stale:
break;
}
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (bch2_fs_inconsistent_on(stale < 0, c,
"pointer to invalid bucket in nocow path on device %llu\n %s",
stale_at->b.inode,
@@ -1486,7 +1484,6 @@ err_bucket_stale:
/* We can retry this: */
ret = bch_err_throw(c, transaction_restart);
}
- printbuf_exit(&buf);
goto err_get_ioref;
}
@@ -1530,7 +1527,7 @@ again:
* freeing up space on specific disks, which means that
* allocations for specific disks may hang arbitrarily long:
*/
- ret = bch2_trans_run(c, lockrestart_do(trans,
+ ret = bch2_trans_do(c,
bch2_alloc_sectors_start_trans(trans,
op->target,
op->opts.erasure_code && !(op->flags & BCH_WRITE_cached),
@@ -1540,7 +1537,7 @@ again:
op->nr_replicas_required,
op->watermark,
op->flags,
- &op->cl, &wp)));
+ &op->cl, &wp));
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
break;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index f22b05e02c1e..de03e20f6e30 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -88,7 +88,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
{
lockdep_assert_held(&j->lock);
- out->atomic++;
+ guard(printbuf_atomic)(out);
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 24);
@@ -98,8 +98,6 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
seq++)
bch2_journal_buf_to_text(out, j, seq);
prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
-
- --out->atomic;
}
static inline struct journal_buf *
@@ -140,9 +138,9 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool stuck = false;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
- buf.atomic++;
+ guard(printbuf_atomic)(&buf);
if (!(error == -BCH_ERR_journal_full ||
error == -BCH_ERR_journal_pin_full) ||
@@ -150,28 +148,24 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
return stuck;
- spin_lock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ if (j->can_discard)
+ return stuck;
- if (j->can_discard) {
- spin_unlock(&j->lock);
- return stuck;
- }
+ stuck = true;
- stuck = true;
+ /*
+ * The journal shutdown path will set ->err_seq, but do it here first to
+ * serialize against concurrent failures and avoid duplicate error
+ * reports.
+ */
+ if (j->err_seq)
+ return stuck;
- /*
- * The journal shutdown path will set ->err_seq, but do it here first to
- * serialize against concurrent failures and avoid duplicate error
- * reports.
- */
- if (j->err_seq) {
- spin_unlock(&j->lock);
- return stuck;
- }
- j->err_seq = journal_cur_seq(j);
+ j->err_seq = journal_cur_seq(j);
- __bch2_journal_debug_to_text(&buf, j);
- spin_unlock(&j->lock);
+ __bch2_journal_debug_to_text(&buf, j);
+ }
prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"),
bch2_err_str(error));
bch2_print_str(c, KERN_ERR, buf.buf);
@@ -179,7 +173,6 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
printbuf_reset(&buf);
bch2_journal_pins_to_text(&buf, j);
bch_err(c, "Journal pins:\n%s", buf.buf);
- printbuf_exit(&buf);
bch2_fatal_error(c);
dump_stack();
@@ -269,22 +262,21 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
if (trace_journal_entry_close_enabled() && trace) {
- struct printbuf pbuf = PRINTBUF;
- pbuf.atomic++;
-
- prt_str(&pbuf, "entry size: ");
- prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data));
- prt_newline(&pbuf);
- bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT);
- trace_journal_entry_close(c, pbuf.buf);
- printbuf_exit(&pbuf);
+ CLASS(printbuf, err)();
+ guard(printbuf_atomic)(&err);
+
+ prt_str(&err, "entry size: ");
+ prt_human_readable_u64(&err, vstruct_bytes(buf->data));
+ prt_newline(&err);
+ bch2_prt_task_backtrace(&err, current, 1, GFP_NOWAIT);
+ trace_journal_entry_close(c, err.buf);
}
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
buf->u64s_reserved) << c->block_bits;
if (unlikely(sectors > buf->sectors)) {
- struct printbuf err = PRINTBUF;
- err.atomic++;
+ CLASS(printbuf, err)();
+ guard(printbuf_atomic)(&err);
prt_printf(&err, "journal entry overran reserved space: %u > %u\n",
sectors, buf->sectors);
@@ -296,7 +288,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
bch2_journal_halt_locked(j);
bch_err(c, "%s", err.buf);
- printbuf_exit(&err);
return;
}
@@ -344,9 +335,8 @@ void bch2_journal_halt_locked(struct journal *j)
void bch2_journal_halt(struct journal *j)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_halt_locked(j);
- spin_unlock(&j->lock);
}
static bool journal_entry_want_write(struct journal *j)
@@ -371,13 +361,8 @@ static bool journal_entry_want_write(struct journal *j)
bool bch2_journal_entry_close(struct journal *j)
{
- bool ret;
-
- spin_lock(&j->lock);
- ret = journal_entry_want_write(j);
- spin_unlock(&j->lock);
-
- return ret;
+ guard(spinlock)(&j->lock);
+ return journal_entry_want_write(j);
}
/*
@@ -394,7 +379,7 @@ static int journal_entry_open(struct journal *j)
lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j));
- BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+ BUG_ON(c->sb.clean);
if (j->blocked)
return bch_err_throw(c, journal_blocked);
@@ -542,7 +527,7 @@ static void journal_write_work(struct work_struct *work)
{
struct journal *j = container_of(work, struct journal, write_work.work);
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
if (__journal_entry_is_open(j->reservations)) {
long delta = journal_cur_buf(j)->expires - jiffies;
@@ -551,7 +536,6 @@ static void journal_write_work(struct work_struct *work)
else
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
}
- spin_unlock(&j->lock);
}
static void journal_buf_prealloc(struct journal *j)
@@ -652,34 +636,32 @@ out:
if (ret == -BCH_ERR_journal_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
trace_journal_entry_full_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_printbuf_make_room(&buf, 4096);
- spin_lock(&j->lock);
- prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
- bch2_journal_bufs_to_text(&buf, j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
+ bch2_journal_bufs_to_text(&buf, j);
+ }
trace_journal_entry_full(c, buf.buf);
- printbuf_exit(&buf);
count_event(c, journal_entry_full);
}
if (ret == -BCH_ERR_journal_max_open &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) &&
trace_journal_entry_full_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_printbuf_make_room(&buf, 4096);
- spin_lock(&j->lock);
- prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
- bch2_journal_bufs_to_text(&buf, j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
+ bch2_journal_bufs_to_text(&buf, j);
+ }
trace_journal_entry_full(c, buf.buf);
- printbuf_exit(&buf);
count_event(c, journal_entry_full);
}
@@ -751,11 +733,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
remaining_wait))
return ret;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_journal_debug_to_text(&buf, j);
bch2_print_str(c, KERN_ERR, buf.buf);
prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret));
- printbuf_exit(&buf);
closure_wait_event(&j->async_wait,
!bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
@@ -772,11 +753,13 @@ void bch2_journal_entry_res_resize(struct journal *j,
union journal_res_state state;
int d = new_u64s - res->u64s;
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
+
+ j->entry_u64s_reserved += d;
+ res->u64s += d;
- j->entry_u64s_reserved += d;
if (d <= 0)
- goto out;
+ return;
j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
state = READ_ONCE(j->reservations);
@@ -791,9 +774,6 @@ void bch2_journal_entry_res_resize(struct journal *j,
} else {
journal_cur_buf(j)->u64s_reserved += d;
}
-out:
- spin_unlock(&j->lock);
- res->u64s += d;
}
/* journal flushing: */
@@ -944,7 +924,6 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
u64 unwritten_seq;
- bool ret = false;
if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
return false;
@@ -952,9 +931,10 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
if (c->journal.flushed_seq_ondisk >= start)
return false;
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
+
if (c->journal.flushed_seq_ondisk >= start)
- goto out;
+ return false;
for (unwritten_seq = journal_last_unwritten_seq(j);
unwritten_seq < end;
@@ -963,15 +943,12 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
/* journal flush already in flight, or flush requseted */
if (buf->must_flush)
- goto out;
+ return false;
buf->noflush = true;
}
- ret = true;
-out:
- spin_unlock(&j->lock);
- return ret;
+ return true;
}
static int __bch2_journal_meta(struct journal *j)
@@ -1010,19 +987,18 @@ int bch2_journal_meta(struct journal *j)
void bch2_journal_unblock(struct journal *j)
{
- spin_lock(&j->lock);
- if (!--j->blocked &&
- j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL &&
- j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) {
- union journal_res_state old, new;
-
- old.v = atomic64_read(&j->reservations.counter);
- do {
- new.v = old.v;
- new.cur_entry_offset = j->cur_entry_offset_if_blocked;
- } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
- }
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ if (!--j->blocked &&
+ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL &&
+ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) {
+ union journal_res_state old, new;
+
+ old.v = atomic64_read(&j->reservations.counter);
+ do {
+ new.v = old.v;
+ new.cur_entry_offset = j->cur_entry_offset_if_blocked;
+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
+ }
journal_wake(j);
}
@@ -1050,9 +1026,8 @@ static void __bch2_journal_block(struct journal *j)
void bch2_journal_block(struct journal *j)
{
- spin_lock(&j->lock);
- __bch2_journal_block(j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ __bch2_journal_block(j);
journal_quiesce(j);
}
@@ -1065,7 +1040,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
/* We're inside wait_event(), but using mutex_lock(: */
sched_annotate_sleep();
mutex_lock(&j->buf_lock);
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
max_seq = min(max_seq, journal_cur_seq(j));
for (u64 seq = journal_last_unwritten_seq(j);
@@ -1088,11 +1063,12 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open
? ERR_PTR(-EAGAIN)
: buf;
+ if (!ret)
+ smp_mb();
break;
}
}
- spin_unlock(&j->lock);
if (IS_ERR_OR_NULL(ret))
mutex_unlock(&j->buf_lock);
return ret;
@@ -1147,16 +1123,14 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
if (ret)
break;
- if (!new_fs) {
- ret = bch2_trans_run(c,
- bch2_trans_mark_metadata_bucket(trans, ca,
- ob[nr_got]->bucket, BCH_DATA_journal,
- ca->mi.bucket_size, BTREE_TRIGGER_transactional));
- if (ret) {
- bch2_open_bucket_put(c, ob[nr_got]);
- bch_err_msg(c, ret, "marking new journal buckets");
- break;
- }
+ CLASS(btree_trans, trans)(c);
+ ret = bch2_trans_mark_metadata_bucket(trans, ca,
+ ob[nr_got]->bucket, BCH_DATA_journal,
+ ca->mi.bucket_size, BTREE_TRIGGER_transactional);
+ if (ret) {
+ bch2_open_bucket_put(c, ob[nr_got]);
+ bch_err_msg(c, ret, "marking new journal buckets");
+ break;
}
bu[nr_got] = ob[nr_got]->bucket;
@@ -1226,12 +1200,13 @@ err_unblock:
mutex_unlock(&c->sb_lock);
}
- if (ret && !new_fs)
+ if (ret) {
+ CLASS(btree_trans, trans)(c);
for (i = 0; i < nr_got; i++)
- bch2_trans_run(c,
- bch2_trans_mark_metadata_bucket(trans, ca,
+ bch2_trans_mark_metadata_bucket(trans, ca,
bu[i], BCH_DATA_free, 0,
- BTREE_TRIGGER_transactional));
+ BTREE_TRIGGER_transactional);
+ }
err_free:
for (i = 0; i < nr_got; i++)
bch2_open_bucket_put(c, ob[i]);
@@ -1296,10 +1271,8 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca
int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
unsigned nr)
{
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false);
- up_write(&c->state_lock);
-
bch_err_fn(c, ret);
return ret;
}
@@ -1423,21 +1396,18 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
{
- bool ret = false;
- u64 seq;
+ guard(spinlock)(&j->lock);
- spin_lock(&j->lock);
- for (seq = journal_last_unwritten_seq(j);
- seq <= journal_cur_seq(j) && !ret;
+ for (u64 seq = journal_last_unwritten_seq(j);
+ seq <= journal_cur_seq(j);
seq++) {
struct journal_buf *buf = journal_seq_to_buf(j, seq);
if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx))
- ret = true;
+ return true;
}
- spin_unlock(&j->lock);
- return ret;
+ return false;
}
void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
@@ -1556,13 +1526,11 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
if (!had_entries)
j->last_empty_seq = cur_seq - 1; /* to match j->seq */
- spin_lock(&j->lock);
- j->last_flush_write = jiffies;
-
- j->reservations.idx = journal_cur_seq(j);
-
- c->last_bucket_seq_cleanup = journal_cur_seq(j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ j->last_flush_write = jiffies;
+ j->reservations.idx = journal_cur_seq(j);
+ c->last_bucket_seq_cleanup = journal_cur_seq(j);
+ }
return 0;
}
@@ -1573,13 +1541,12 @@ void bch2_journal_set_replay_done(struct journal *j)
* journal_space_available must happen before setting JOURNAL_running
* JOURNAL_running must happen before JOURNAL_replay_done
*/
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_space_available(j);
set_bit(JOURNAL_need_flush_write, &j->flags);
set_bit(JOURNAL_running, &j->flags);
set_bit(JOURNAL_replay_done, &j->flags);
- spin_unlock(&j->lock);
}
/* init/exit: */
@@ -1589,7 +1556,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca)
struct journal_device *ja = &ca->journal;
for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
- kfree(ja->bio[i]);
+ kvfree(ja->bio[i]);
ja->bio[i] = NULL;
}
@@ -1626,7 +1593,16 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
- ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
+ /*
+ * kvzalloc() is not what we want to be using here:
+ * JOURNAL_ENTRY_SIZE_MAX is probably quite a bit bigger than it
+ * needs to be.
+ *
+ * But changing that will require performance testing -
+ * performance can be sensitive to anything that affects journal
+ * pipelining.
+ */
+ ja->bio[i] = kvzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
nr_bvecs), GFP_KERNEL);
if (!ja->bio[i])
return bch_err_throw(c, ENOMEM_dev_journal_init);
@@ -1727,9 +1703,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 28);
- out->atomic++;
+ guard(printbuf_atomic)(out);
guard(rcu)();
+
s = READ_ONCE(j->reservations);
prt_printf(out, "flags:\t");
@@ -1819,13 +1796,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
}
prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
-
- --out->atomic;
}
void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
__bch2_journal_debug_to_text(out, j);
- spin_unlock(&j->lock);
}
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 977907038d98..c05aa94237f8 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -267,7 +267,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
{
union journal_res_state s;
- s.v = atomic64_sub_return(((union journal_res_state) {
+ s.v = atomic64_sub_return_release(((union journal_res_state) {
.buf0_count = idx == 0,
.buf1_count = idx == 1,
.buf2_count = idx == 2,
@@ -297,9 +297,8 @@ static inline void bch2_journal_buf_put(struct journal *j, u64 seq)
s = journal_state_buf_put(j, idx);
if (!journal_state_count(s, idx)) {
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_buf_put_final(j, seq);
- spin_unlock(&j->lock);
} else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL))
wake_up(&j->wait);
}
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 2d6ce4348a22..2835250a14c4 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -35,7 +35,8 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *c)
void bch2_journal_pos_from_member_info_resume(struct bch_fs *c)
{
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
+
for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
@@ -46,16 +47,14 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c)
if (offset <= ca->mi.bucket_size)
ca->journal.sectors_free = ca->mi.bucket_size - offset;
}
- mutex_unlock(&c->sb_lock);
}
static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p)
{
- struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev);
+ CLASS(bch2_dev_tryget_noerror, ca)(c, p->dev);
prt_printf(out, "%s %u:%u:%u (sector %llu)",
ca ? ca->name : "(invalid dev)",
p->dev, p->bucket, p->bucket_offset, p->sector);
- bch2_dev_put(ca);
}
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j)
@@ -157,7 +156,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct journal_replay **_i, *i, *dup;
size_t bytes = vstruct_bytes(j);
u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = JOURNAL_ENTRY_ADD_OK;
if (last_seq && c->opts.journal_rewind)
@@ -223,7 +222,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
ret = darray_push(&dup->ptrs, entry_ptr);
if (ret)
- goto out;
+ return ret;
bch2_journal_replay_to_text(&buf, c, dup);
@@ -240,7 +239,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
if (entry_ptr.csum_good && !identical)
goto replace;
- goto out;
+ return ret;
}
replace:
i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
@@ -263,9 +262,7 @@ replace:
}
*_i = i;
-out:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -312,7 +309,7 @@ static void journal_entry_err_msg(struct printbuf *out,
#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \
({ \
- struct printbuf _buf = PRINTBUF; \
+ CLASS(printbuf, _buf)(); \
\
journal_entry_err_msg(&_buf, version, jset, entry); \
prt_printf(&_buf, msg, ##__VA_ARGS__); \
@@ -331,7 +328,6 @@ static void journal_entry_err_msg(struct printbuf *out,
break; \
} \
\
- printbuf_exit(&_buf); \
true; \
})
@@ -617,7 +613,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
- struct printbuf err = PRINTBUF;
+ CLASS(printbuf, err)();
int ret = 0;
if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -626,7 +622,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
journal_entry_data_usage_bad_size,
"invalid journal entry usage: bad size")) {
journal_entry_null_range(entry, vstruct_next(entry));
- goto out;
+ return 0;
}
if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err),
@@ -634,11 +630,9 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
journal_entry_data_usage_bad_size,
"invalid journal entry usage: %s", err.buf)) {
journal_entry_null_range(entry, vstruct_next(entry));
- goto out;
+ return 0;
}
-out:
fsck_err:
- printbuf_exit(&err);
return ret;
}
@@ -1165,17 +1159,16 @@ reread:
vstruct_end(j) - (void *) j->encrypted_start);
bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret));
- mutex_lock(&jlist->lock);
- ret = journal_entry_add(c, ca, (struct journal_ptr) {
- .csum_good = csum_good,
- .csum = csum,
- .dev = ca->dev_idx,
- .bucket = bucket,
- .bucket_offset = offset -
- bucket_to_sector(ca, ja->buckets[bucket]),
- .sector = offset,
- }, jlist, j);
- mutex_unlock(&jlist->lock);
+ scoped_guard(mutex, &jlist->lock)
+ ret = journal_entry_add(c, ca, (struct journal_ptr) {
+ .csum_good = csum_good,
+ .csum = csum,
+ .dev = ca->dev_idx,
+ .bucket = bucket,
+ .bucket_offset = offset -
+ bucket_to_sector(ca, ja->buckets[bucket]),
+ .sector = offset,
+ }, jlist, j);
switch (ret) {
case JOURNAL_ENTRY_ADD_OK:
@@ -1235,16 +1228,15 @@ out:
closure_return(cl);
return;
err:
- mutex_lock(&jlist->lock);
- jlist->ret = ret;
- mutex_unlock(&jlist->lock);
+ scoped_guard(mutex, &jlist->lock)
+ jlist->ret = ret;
goto out;
}
noinline_for_stack
static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
@@ -1271,7 +1263,6 @@ static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_r
prt_printf(&buf, "\n(had good copy on another device)");
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end)
@@ -1299,7 +1290,6 @@ struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u
noinline_for_stack
static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq)
{
- struct printbuf buf = PRINTBUF;
int ret = 0;
struct genradix_iter radix_iter;
@@ -1318,7 +1308,7 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e
struct u64_range missing;
while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) {
- printbuf_reset(&buf);
+ CLASS(printbuf, buf)();
prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)",
missing.start, missing.end - 1,
start_seq, end_seq);
@@ -1342,7 +1332,6 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e
seq = le64_to_cpu(i->j.seq) + 1;
}
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1354,7 +1343,6 @@ int bch2_journal_read(struct bch_fs *c,
struct journal_list jlist;
struct journal_replay *i, **_i;
struct genradix_iter radix_iter;
- struct printbuf buf = PRINTBUF;
bool degraded = false, last_write_torn = false;
u64 seq;
int ret = 0;
@@ -1443,24 +1431,27 @@ int bch2_journal_read(struct bch_fs *c,
return 0;
}
- printbuf_reset(&buf);
- prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
- *last_seq, *blacklist_seq - 1);
-
- /*
- * Drop blacklisted entries and entries older than last_seq (or start of
- * journal rewind:
- */
u64 drop_before = *last_seq;
- if (c->opts.journal_rewind) {
- drop_before = min(drop_before, c->opts.journal_rewind);
- prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
+ {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
+ *last_seq, *blacklist_seq - 1);
+
+ /*
+ * Drop blacklisted entries and entries older than last_seq (or start of
+ * journal rewind:
+ */
+ if (c->opts.journal_rewind) {
+ drop_before = min(drop_before, c->opts.journal_rewind);
+ prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
+ }
+
+ *last_seq = drop_before;
+ if (*start_seq != *blacklist_seq)
+ prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
+ bch_info(c, "%s", buf.buf);
}
- *last_seq = drop_before;
- if (*start_seq != *blacklist_seq)
- prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
- bch_info(c, "%s", buf.buf);
genradix_for_each(&c->journal_entries, radix_iter, _i) {
i = *_i;
@@ -1483,7 +1474,7 @@ int bch2_journal_read(struct bch_fs *c,
ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1);
if (ret)
- goto err;
+ return ret;
genradix_for_each(&c->journal_entries, radix_iter, _i) {
union bch_replicas_padded replicas = {
@@ -1512,14 +1503,14 @@ int bch2_journal_read(struct bch_fs *c,
i->ptrs.data[0].sector,
READ);
if (ret)
- goto err;
+ return ret;
darray_for_each(i->ptrs, ptr)
replicas_entry_add_dev(&replicas.e, ptr->dev);
bch2_replicas_entry_sort(&replicas.e);
- printbuf_reset(&buf);
+ CLASS(printbuf, buf)();
bch2_replicas_entry_to_text(&buf, &replicas.e);
if (!degraded &&
@@ -1530,12 +1521,10 @@ int bch2_journal_read(struct bch_fs *c,
le64_to_cpu(i->j.seq), buf.buf))) {
ret = bch2_mark_replicas(c, &replicas.e);
if (ret)
- goto err;
+ return ret;
}
}
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1695,10 +1684,10 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
memcpy(new_buf, buf->data, buf->buf_size);
- spin_lock(&j->lock);
- swap(buf->data, new_buf);
- swap(buf->buf_size, new_size);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ swap(buf->data, new_buf);
+ swap(buf->buf_size, new_size);
+ }
kvfree(new_buf);
}
@@ -1725,7 +1714,7 @@ static CLOSURE_CALLBACK(journal_write_done)
}
if (err && !bch2_journal_error(j)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
if (err == -BCH_ERR_journal_write_err)
@@ -1737,7 +1726,6 @@ static CLOSURE_CALLBACK(journal_write_done)
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
closure_debug_destroy(cl);
@@ -1780,6 +1768,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_wake_up(&c->freelist_wait);
bch2_reset_alloc_cursors(c);
+ do_discards = true;
}
j->seq_ondisk = seq;
@@ -1878,7 +1867,11 @@ static CLOSURE_CALLBACK(journal_write_submit)
jbio->submit_time = local_clock();
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
+ /*
+ * blk-wbt.c throttles all writes except those that have both
+ * REQ_SYNC and REQ_IDLE set...
+ */
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META);
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
@@ -2018,9 +2011,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
}
}
- spin_lock(&c->journal.lock);
- w->need_flush_to_write_buffer = false;
- spin_unlock(&c->journal.lock);
+ scoped_guard(spinlock, &c->journal.lock)
+ w->need_flush_to_write_buffer = false;
start = end = vstruct_last(jset);
@@ -2158,21 +2150,21 @@ CLOSURE_CALLBACK(bch2_journal_write)
j->write_start_time = local_clock();
- spin_lock(&j->lock);
- if (nr_rw_members > 1)
- w->separate_flush = true;
+ scoped_guard(spinlock, &j->lock) {
+ if (nr_rw_members > 1)
+ w->separate_flush = true;
- ret = bch2_journal_write_pick_flush(j, w);
- spin_unlock(&j->lock);
+ ret = bch2_journal_write_pick_flush(j, w);
+ }
if (unlikely(ret))
goto err;
- mutex_lock(&j->buf_lock);
- journal_buf_realloc(j, w);
+ scoped_guard(mutex, &j->buf_lock) {
+ journal_buf_realloc(j, w);
- ret = bch2_journal_write_prep(j, w);
- mutex_unlock(&j->buf_lock);
+ ret = bch2_journal_write_prep(j, w);
+ }
if (unlikely(ret))
goto err;
@@ -2193,22 +2185,22 @@ CLOSURE_CALLBACK(bch2_journal_write)
if (unlikely(ret))
goto err;
- spin_lock(&j->lock);
- /*
- * write is allocated, no longer need to account for it in
- * bch2_journal_space_available():
- */
- w->sectors = 0;
- w->write_allocated = true;
- j->entry_bytes_written += vstruct_bytes(w->data);
+ scoped_guard(spinlock, &j->lock) {
+ /*
+ * write is allocated, no longer need to account for it in
+ * bch2_journal_space_available():
+ */
+ w->sectors = 0;
+ w->write_allocated = true;
+ j->entry_bytes_written += vstruct_bytes(w->data);
- /*
- * journal entry has been compacted and allocated, recalculate space
- * available:
- */
- bch2_journal_space_available(j);
- bch2_journal_do_writes(j);
- spin_unlock(&j->lock);
+ /*
+ * journal entry has been compacted and allocated, recalculate space
+ * available:
+ */
+ bch2_journal_space_available(j);
+ bch2_journal_do_writes(j);
+ }
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
@@ -2232,7 +2224,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
return;
err_allocate_write:
if (!bch2_journal_error(j)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_journal_debug_to_text(&buf, j);
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
@@ -2240,7 +2232,6 @@ err_allocate_write:
vstruct_sectors(w->data, c->block_bits),
bch2_err_str(ret));
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
err:
bch2_fatal_error(c);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index cd6201741c59..f23e5ee9ad75 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
return (struct journal_space) { 0, 0 };
/*
+ * It's possible for bucket size to be misaligned w.r.t. the filesystem
+ * block size:
+ */
+ min_bucket_size = round_down(min_bucket_size, block_sectors(c));
+
+ /*
* We sorted largest to smallest, and we want the smallest out of the
* @nr_devs_want largest devices:
*/
@@ -215,8 +221,8 @@ void bch2_journal_space_available(struct journal *j)
if (nr_online < metadata_replicas_required(c)) {
if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
@@ -224,7 +230,6 @@ void bch2_journal_space_available(struct journal *j)
prt_printf(&buf, " %s", ca->name);
bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
ret = bch_err_throw(c, insufficient_journal_devices);
goto out;
@@ -274,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
{
- spin_lock(&j->lock);
- bool ret = __should_discard_bucket(j, ja);
- spin_unlock(&j->lock);
-
- return ret;
+ guard(spinlock)(&j->lock);
+ return __should_discard_bucket(j, ja);
}
/*
@@ -289,7 +291,7 @@ void bch2_journal_do_discards(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- mutex_lock(&j->discard_lock);
+ guard(mutex)(&j->discard_lock);
for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) {
struct journal_device *ja = &ca->journal;
@@ -303,15 +305,12 @@ void bch2_journal_do_discards(struct journal *j)
ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOFS);
- spin_lock(&j->lock);
- ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-
- bch2_journal_space_available(j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+ bch2_journal_space_available(j);
+ }
}
}
-
- mutex_unlock(&j->discard_lock);
}
/*
@@ -352,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq)
void bch2_journal_pin_put(struct journal *j, u64 seq)
{
if (__bch2_journal_pin_put(j, seq)) {
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_reclaim_fast(j);
- spin_unlock(&j->lock);
}
}
@@ -387,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j,
void bch2_journal_pin_drop(struct journal *j,
struct journal_entry_pin *pin)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
if (__journal_pin_drop(j, pin))
bch2_journal_reclaim_fast(j);
- spin_unlock(&j->lock);
}
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
@@ -437,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j,
struct journal_entry_pin *src,
journal_pin_flush_fn flush_fn)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
u64 seq = READ_ONCE(src->seq);
@@ -448,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j,
* longer to exist, but that means there's no longer anything to
* copy and we can bail out here:
*/
- spin_unlock(&j->lock);
return;
}
@@ -465,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j,
*/
if (seq == journal_last_seq(j))
journal_wake(j);
- spin_unlock(&j->lock);
}
void bch2_journal_pin_set(struct journal *j, u64 seq,
struct journal_entry_pin *pin,
journal_pin_flush_fn flush_fn)
{
- spin_lock(&j->lock);
+ bool wake;
- BUG_ON(seq < journal_last_seq(j));
+ scoped_guard(spinlock, &j->lock) {
+ BUG_ON(seq < journal_last_seq(j));
- bool reclaim = __journal_pin_drop(j, pin);
+ bool reclaim = __journal_pin_drop(j, pin);
- bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
- if (reclaim)
- bch2_journal_reclaim_fast(j);
- /*
- * If the journal is currently full, we might want to call flush_fn
- * immediately:
- */
- if (seq == journal_last_seq(j))
- journal_wake(j);
+ if (reclaim)
+ bch2_journal_reclaim_fast(j);
+ /*
+ * If the journal is currently full, we might want to call flush_fn
+ * immediately:
+ */
+ wake = seq == journal_last_seq(j);
+ }
- spin_unlock(&j->lock);
+ if (wake)
+ journal_wake(j);
}
/**
@@ -574,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j,
j->last_flushed = jiffies;
- spin_lock(&j->lock);
- pin = journal_get_next_pin(j, seq_to_flush,
- allowed_below,
- allowed_above, &seq);
- if (pin) {
- BUG_ON(j->flush_in_progress);
- j->flush_in_progress = pin;
- j->flush_in_progress_dropped = false;
- flush_fn = pin->flush;
+ scoped_guard(spinlock, &j->lock) {
+ pin = journal_get_next_pin(j, seq_to_flush,
+ allowed_below,
+ allowed_above, &seq);
+ if (pin) {
+ BUG_ON(j->flush_in_progress);
+ j->flush_in_progress = pin;
+ j->flush_in_progress_dropped = false;
+ flush_fn = pin->flush;
+ }
}
- spin_unlock(&j->lock);
if (!pin)
break;
@@ -597,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j,
err = flush_fn(j, pin, seq);
- spin_lock(&j->lock);
- /* Pin might have been dropped or rearmed: */
- if (likely(!err && !j->flush_in_progress_dropped))
- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
- j->flush_in_progress = NULL;
- j->flush_in_progress_dropped = false;
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ /* Pin might have been dropped or rearmed: */
+ if (likely(!err && !j->flush_in_progress_dropped))
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
+ j->flush_in_progress = NULL;
+ j->flush_in_progress_dropped = false;
+ }
wake_up(&j->pin_flush_wait);
@@ -764,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg)
j->reclaim_kicked = false;
- mutex_lock(&j->reclaim_lock);
- ret = __bch2_journal_reclaim(j, false, kicked);
- mutex_unlock(&j->reclaim_lock);
+ scoped_guard(mutex, &j->reclaim_lock)
+ ret = __bch2_journal_reclaim(j, false, kicked);
now = jiffies;
delay = msecs_to_jiffies(c->opts.journal_reclaim_delay);
@@ -782,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg)
if (j->reclaim_kicked)
break;
- spin_lock(&j->lock);
- journal_empty = fifo_empty(&j->pin);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ journal_empty = fifo_empty(&j->pin);
long timeout = j->next_reclaim - jiffies;
@@ -838,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j)
static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
unsigned types)
{
+ guard(spinlock)(&j->lock);
+
struct journal_entry_pin_list *pin_list;
u64 seq;
-
- spin_lock(&j->lock);
fifo_for_each_entry_ptr(pin_list, &j->pin, seq) {
if (seq > seq_to_flush)
break;
@@ -849,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
if ((BIT(i) & types) &&
(!list_empty(&pin_list->unflushed[i]) ||
- !list_empty(&pin_list->flushed[i]))) {
- spin_unlock(&j->lock);
+ !list_empty(&pin_list->flushed[i])))
return true;
- }
}
- spin_unlock(&j->lock);
return false;
}
@@ -875,32 +867,54 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
if (ret)
return ret;
- mutex_lock(&j->reclaim_lock);
+ guard(mutex)(&j->reclaim_lock);
for (int type = JOURNAL_PIN_TYPE_NR - 1;
type >= 0;
--type)
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
*did_work = true;
- goto unlock;
+
+ /*
+ * Question from Dan Carpenter, on the early return:
+ *
+ * If journal_flush_pins_or_still_flushing() returns
+ * true, then the flush hasn't complete and we must
+ * return 0; we want the outer closure_wait_event() in
+ * journal_flush_pins() to continue.
+ *
+ * The early return is there because we don't want to
+ * call journal_entry_close() until we've finished
+ * flushing all outstanding journal pins - otherwise
+ * seq_to_flush can be U64_MAX, and we'll close a bunch
+ * of journal entries and write tiny ones completely
+ * unnecessarily.
+ *
+ * Having the early return be in the loop where we loop
+ * over types is important, because flushing one journal
+ * pin can cause new journal pins to be added (even of
+ * the same type, btree node writes may generate more
+ * btree node writes, when updating the parent pointer
+ * has a full node and has to trigger a split/compact).
+ *
+ * This is part of our shutdown sequence, where order of
+ * flushing is important in order to make sure that it
+ * terminates...
+ */
+ return 0;
}
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
- spin_lock(&j->lock);
/*
* If journal replay hasn't completed, the unreplayed journal entries
* hold refs on their corresponding sequence numbers
*/
+ guard(spinlock)(&j->lock);
ret = !test_bit(JOURNAL_replay_done, &j->flags) ||
journal_last_seq(j) > seq_to_flush ||
!fifo_used(&j->pin);
-
- spin_unlock(&j->lock);
-unlock:
- mutex_unlock(&j->reclaim_lock);
-
return ret;
}
@@ -925,13 +939,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
u64 iter, seq = 0;
int ret = 0;
- spin_lock(&j->lock);
- fifo_for_each_entry_ptr(p, &j->pin, iter)
- if (dev_idx >= 0
- ? bch2_dev_list_has_dev(p->devs, dev_idx)
- : p->devs.nr < c->opts.metadata_replicas)
- seq = iter;
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ fifo_for_each_entry_ptr(p, &j->pin, iter)
+ if (dev_idx >= 0
+ ? bch2_dev_list_has_dev(p->devs, dev_idx)
+ : p->devs.nr < c->opts.metadata_replicas)
+ seq = iter;
bch2_journal_flush_pins(j, seq);
@@ -939,7 +952,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
if (ret)
return ret;
- mutex_lock(&c->replicas_gc_lock);
+ guard(mutex)(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
/*
@@ -954,29 +967,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
goto err;
seq = 0;
- spin_lock(&j->lock);
- while (!ret) {
- union bch_replicas_padded replicas;
+ scoped_guard(spinlock, &j->lock)
+ while (!ret) {
+ union bch_replicas_padded replicas;
- seq = max(seq, journal_last_seq(j));
- if (seq >= j->pin.back)
- break;
- bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
- journal_seq_pin(j, seq)->devs);
- seq++;
+ seq = max(seq, journal_last_seq(j));
+ if (seq >= j->pin.back)
+ break;
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ journal_seq_pin(j, seq)->devs);
+ seq++;
- if (replicas.e.nr_devs) {
- spin_unlock(&j->lock);
- ret = bch2_mark_replicas(c, &replicas.e);
- spin_lock(&j->lock);
+ if (replicas.e.nr_devs) {
+ spin_unlock(&j->lock);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ spin_lock(&j->lock);
+ }
}
- }
- spin_unlock(&j->lock);
err:
- ret = bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
-
- return ret;
+ return bch2_replicas_gc_end(c, ret);
}
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
@@ -984,20 +993,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
- spin_lock(&j->lock);
- if (!test_bit(JOURNAL_running, &j->flags)) {
- spin_unlock(&j->lock);
+ guard(spinlock)(&j->lock);
+ guard(printbuf_atomic)(out);
+
+ if (!test_bit(JOURNAL_running, &j->flags))
return true;
- }
*seq = max(*seq, j->pin.front);
- if (*seq >= j->pin.back) {
- spin_unlock(&j->lock);
+ if (*seq >= j->pin.back)
return true;
- }
-
- out->atomic++;
pin_list = journal_seq_pin(j, *seq);
@@ -1016,9 +1021,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
printbuf_indent_sub(out, 2);
- --out->atomic;
- spin_unlock(&j->lock);
-
return false;
}
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index 6361809b5e2e..399db5b77d9f 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -49,7 +49,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
unsigned i = 0, nr;
int ret = 0;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
nr = blacklist_nr_entries(bl);
@@ -77,10 +77,8 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
sb_blacklist_u64s(nr + 1));
- if (!bl) {
- ret = bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist);
- goto out;
- }
+ if (!bl)
+ return bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist);
array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) {
.start = cpu_to_le64(start),
@@ -89,8 +87,6 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
ret = bch2_write_super(c);
-out:
- mutex_unlock(&c->sb_lock);
return ret ?: bch2_blacklist_table_initialize(c);
}
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index 75f27ec26f85..0367ea37e857 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -35,7 +35,7 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
{
struct bch_fs *c = trans->c;
u32 restart_count = trans->restart_count;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags),
@@ -56,21 +56,18 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_exit(&sk, c);
fsck_err:
- printbuf_exit(&buf);
return ret ?: trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_max(trans, iter,
BTREE_ID_logged_ops,
POS(LOGGED_OPS_INUM_logged_ops, 0),
POS(LOGGED_OPS_INUM_logged_ops, U64_MAX),
BTREE_ITER_prefetch, k,
- resume_logged_op(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ resume_logged_op(trans, &iter, k));
}
static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
@@ -107,12 +104,11 @@ int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
*/
if (ret) {
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
bch2_fs_fatal_error(c, "deleting logged operation %s: %s",
buf.buf, bch2_err_str(ret));
- printbuf_exit(&buf);
}
return ret;
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 57b5b3263b08..ee14656c3fdd 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -86,7 +86,7 @@ int bch2_lru_check_set(struct btree_trans *trans,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct btree_iter lru_iter;
struct bkey_s_c lru_k =
bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
@@ -112,7 +112,6 @@ int bch2_lru_check_set(struct btree_trans *trans,
err:
fsck_err:
bch2_trans_iter_exit(trans, &lru_iter);
- printbuf_exit(&buf);
return ret;
}
@@ -166,8 +165,8 @@ static int bch2_check_lru_key(struct btree_trans *trans,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
+ CLASS(printbuf, buf1)();
+ CLASS(printbuf, buf2)();
struct bbpos bp = lru_pos_to_bp(lru_k);
@@ -198,8 +197,6 @@ static int bch2_check_lru_key(struct btree_trans *trans,
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
return ret;
}
@@ -210,14 +207,13 @@ int bch2_check_lrus(struct bch_fs *c)
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_lru_key(trans, &iter, k, &last_flushed)));
+ bch2_check_lru_key(trans, &iter, k, &last_flushed));
bch2_bkey_buf_exit(&last_flushed, c);
- bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index f296cce95338..bd1e54e0efd5 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -119,34 +119,29 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
unsigned dev_idx, unsigned flags)
{
- struct btree_trans *trans = bch2_trans_get(c);
- enum btree_id id;
- int ret = 0;
+ CLASS(btree_trans, trans)(c);
- for (id = 0; id < BTREE_ID_NR; id++) {
+ for (unsigned id = 0; id < BTREE_ID_NR; id++) {
if (!btree_type_has_ptrs(id))
continue;
- ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
+ int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
bch2_progress_update_iter(trans, progress, &iter, "dropping user data");
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
}));
if (ret)
- break;
+ return ret;
}
- bch2_trans_put(trans);
-
- return ret;
+ return 0;
}
static int bch2_dev_metadata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
unsigned dev_idx, unsigned flags)
{
- struct btree_trans *trans;
struct btree_iter iter;
struct closure cl;
struct btree *b;
@@ -158,7 +153,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
if (flags & BCH_FORCE_IF_METADATA_LOST)
return bch_err_throw(c, remove_with_metadata_missing_unimplemented);
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
bch2_bkey_buf_init(&k);
closure_init_stack(&cl);
@@ -199,7 +194,6 @@ next:
ret = 0;
err:
bch2_bkey_buf_exit(&k, c);
- bch2_trans_put(trans);
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
@@ -240,7 +234,7 @@ out:
int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
@@ -260,7 +254,6 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig
}));
bch2_bkey_buf_exit(&last_flushed, trans->c);
- bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index eec591e947bd..3f44bb54f91a 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -54,22 +54,20 @@ trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
trace_io_move(c, buf.buf);
- printbuf_exit(&buf);
}
static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
trace_io_move_read(c, buf.buf);
- printbuf_exit(&buf);
}
static noinline void
@@ -78,7 +76,7 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k,
struct data_update_opts *data_opts,
move_pred_fn pred, void *_arg, bool p)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "%ps: %u", pred, p);
@@ -92,7 +90,6 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k,
prt_newline(&buf);
bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
trace_io_move_pred(c, buf.buf);
- printbuf_exit(&buf);
}
static noinline void
@@ -128,10 +125,9 @@ static void move_free(struct moving_io *io)
if (io->b)
atomic_dec(&io->b->count);
- mutex_lock(&ctxt->lock);
- list_del(&io->io_list);
+ scoped_guard(mutex, &ctxt->lock)
+ list_del(&io->io_list);
wake_up(&ctxt->wait);
- mutex_unlock(&ctxt->lock);
if (!io->write.data_opts.scrub) {
bch2_data_update_exit(&io->write);
@@ -150,11 +146,9 @@ static void move_write_done(struct bch_write_op *op)
if (op->error) {
if (trace_io_move_write_fail_enabled()) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
bch2_write_op_to_text(&buf, op);
trace_io_move_write_fail(c, buf.buf);
- printbuf_exit(&buf);
}
this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]);
@@ -203,11 +197,9 @@ static void move_write(struct moving_io *io)
}
if (trace_io_move_write_enabled()) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
trace_io_move_write(c, buf.buf);
- printbuf_exit(&buf);
}
closure_get(&io->write.ctxt->cl);
@@ -276,9 +268,8 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
EBUG_ON(atomic_read(&ctxt->read_sectors));
EBUG_ON(atomic_read(&ctxt->read_ios));
- mutex_lock(&c->moving_context_lock);
- list_del(&ctxt->list);
- mutex_unlock(&c->moving_context_lock);
+ scoped_guard(mutex, &c->moving_context_lock)
+ list_del(&ctxt->list);
/*
* Generally, releasing a transaction within a transaction restart means
@@ -314,9 +305,8 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
INIT_LIST_HEAD(&ctxt->ios);
init_waitqueue_head(&ctxt->wait);
- mutex_lock(&c->moving_context_lock);
- list_add(&ctxt->list, &c->moving_context_list);
- mutex_unlock(&c->moving_context_lock);
+ scoped_guard(mutex, &c->moving_context_lock)
+ list_add(&ctxt->list, &c->moving_context_list);
}
void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
@@ -412,13 +402,13 @@ int bch2_move_extent(struct moving_context *ctxt,
if (trace_io_move_read_enabled())
trace_io_move_read2(c, k);
- mutex_lock(&ctxt->lock);
- atomic_add(io->read_sectors, &ctxt->read_sectors);
- atomic_inc(&ctxt->read_ios);
+ scoped_guard(mutex, &ctxt->lock) {
+ atomic_add(io->read_sectors, &ctxt->read_sectors);
+ atomic_inc(&ctxt->read_ios);
- list_add_tail(&io->read_list, &ctxt->reads);
- list_add_tail(&io->io_list, &ctxt->ios);
- mutex_unlock(&ctxt->lock);
+ list_add_tail(&io->read_list, &ctxt->reads);
+ list_add_tail(&io->io_list, &ctxt->ios);
+ }
/*
* dropped by move_read_endio() - guards against use after free of
@@ -443,13 +433,11 @@ err:
count_event(c, io_move_start_fail);
if (trace_io_move_start_fail_enabled()) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
prt_str(&buf, ": ");
prt_str(&buf, bch2_err_str(ret));
trace_io_move_start_fail(c, buf.buf);
- printbuf_exit(&buf);
}
if (bch2_err_matches(ret, BCH_ERR_data_update_done))
@@ -468,7 +456,7 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct bch_io_opts *opts_ret = &io_opts->fs_io_opts;
int ret = 0;
- if (extent_iter->min_depth)
+ if (btree_iter_path(trans, extent_iter)->level)
return opts_ret;
if (extent_k.k->type == KEY_TYPE_reflink_v)
@@ -672,8 +660,7 @@ retry_root:
k = bkey_i_to_s_c(&b->key);
- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
- iter.pos, &iter, k);
+ io_opts = &snapshot_io_opts.fs_io_opts;
ret = PTR_ERR_OR_ZERO(io_opts);
if (ret)
goto root_err;
@@ -875,7 +862,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
u64 check_mismatch_done = bucket_start;
int ret = 0;
- struct bch_dev *ca = bch2_dev_tryget(c, dev);
+ CLASS(bch2_dev_tryget, ca)(c, dev);
if (!ca)
return 0;
@@ -1014,7 +1001,6 @@ err:
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&sk, c);
bch2_bkey_buf_exit(&last_flushed, c);
- bch2_dev_put(ca);
return ret;
}
@@ -1031,9 +1017,9 @@ int bch2_move_data_phys(struct bch_fs *c,
{
struct moving_context ctxt;
- bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
-
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+ bch2_btree_write_buffer_flush_sync(ctxt.trans);
+
if (ctxt.stats) {
ctxt.stats->phys = true;
ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
@@ -1268,12 +1254,11 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
BBPOS_MAX,
rewrite_old_nodes_pred, c, stats);
if (!ret) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
bch_err_fn(c, ret);
@@ -1343,18 +1328,18 @@ static bool scrub_pred(struct bch_fs *c, void *_arg,
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
- struct bch_ioctl_data op)
+ struct bch_ioctl_data *op)
{
- struct bbpos start = BBPOS(op.start_btree, op.start_pos);
- struct bbpos end = BBPOS(op.end_btree, op.end_pos);
+ struct bbpos start = BBPOS(op->start_btree, op->start_pos);
+ struct bbpos end = BBPOS(op->end_btree, op->end_pos);
int ret = 0;
- if (op.op >= BCH_DATA_OP_NR)
+ if (op->op >= BCH_DATA_OP_NR)
return -EINVAL;
- bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
+ bch2_move_stats_init(stats, bch2_data_ops_strs[op->op]);
- switch (op.op) {
+ switch (op->op) {
case BCH_DATA_OP_scrub:
/*
* prevent tests from spuriously failing, make sure we see all
@@ -1362,13 +1347,13 @@ int bch2_data_job(struct bch_fs *c,
*/
bch2_btree_interior_updates_flush(c);
- ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
- op.scrub.data_types,
+ ret = bch2_move_data_phys(c, op->scrub.dev, 0, U64_MAX,
+ op->scrub.data_types,
NULL,
stats,
writepoint_hashed((unsigned long) current),
false,
- scrub_pred, &op) ?: ret;
+ scrub_pred, op) ?: ret;
break;
case BCH_DATA_OP_rereplicate:
@@ -1385,18 +1370,18 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_migrate:
- if (op.migrate.dev >= c->sb.nr_devices)
+ if (op->migrate.dev >= c->sb.nr_devices)
return -EINVAL;
stats->data_type = BCH_DATA_journal;
- ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
- ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX,
+ ret = bch2_journal_flush_device_pins(&c->journal, op->migrate.dev);
+ ret = bch2_move_data_phys(c, op->migrate.dev, 0, U64_MAX,
~0,
NULL,
stats,
writepoint_hashed((unsigned long) current),
true,
- migrate_pred, &op) ?: ret;
+ migrate_pred, op) ?: ret;
bch2_btree_interior_updates_flush(c);
ret = bch2_replicas_gc2(c) ?: ret;
break;
@@ -1468,11 +1453,11 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
printbuf_indent_add(out, 2);
- mutex_lock(&ctxt->lock);
- struct moving_io *io;
- list_for_each_entry(io, &ctxt->ios, io_list)
- bch2_data_update_inflight_to_text(out, &io->write);
- mutex_unlock(&ctxt->lock);
+ scoped_guard(mutex, &ctxt->lock) {
+ struct moving_io *io;
+ list_for_each_entry(io, &ctxt->ios, io_list)
+ bch2_data_update_inflight_to_text(out, &io->write);
+ }
printbuf_indent_sub(out, 4);
}
@@ -1481,10 +1466,9 @@ void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
{
struct moving_context *ctxt;
- mutex_lock(&c->moving_context_lock);
- list_for_each_entry(ctxt, &c->moving_context_list, list)
- bch2_moving_ctxt_to_text(out, c, ctxt);
- mutex_unlock(&c->moving_context_lock);
+ scoped_guard(mutex, &c->moving_context_lock)
+ list_for_each_entry(ctxt, &c->moving_context_list, list)
+ bch2_moving_ctxt_to_text(out, c, ctxt);
}
void bch2_fs_move_init(struct bch_fs *c)
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 86b80499ac55..fe92ca6d418d 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -152,7 +152,7 @@ int bch2_evacuate_bucket(struct moving_context *,
struct data_update_opts);
int bch2_data_job(struct bch_fs *,
struct bch_move_stats *,
- struct bch_ioctl_data);
+ struct bch_ioctl_data *);
void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 27e68d470ad0..9192b1fc3594 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -71,7 +71,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (ret)
return ret;
- struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode);
+ CLASS(bch2_dev_bucket_tryget, ca)(c, k.k->p);
if (!ca)
goto out;
@@ -90,7 +90,6 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
ret = lru_idx && lru_idx <= time;
out:
- bch2_dev_put(ca);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -320,8 +319,8 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
bch2_printbuf_make_room(out, 4096);
struct task_struct *t;
- out->atomic++;
scoped_guard(rcu) {
+ guard(printbuf_atomic)(out);
prt_printf(out, "Currently calculated wait:\n");
for_each_rw_member_rcu(c, ca) {
prt_printf(out, " %s:\t", ca->name);
@@ -333,7 +332,6 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
if (t)
get_task_struct(t);
}
- --out->atomic;
if (t) {
bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL);
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 3e2b41babc26..8fa108880f58 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -99,7 +99,9 @@ int bch2_create_trans(struct btree_trans *trans,
* If we're not root, we have to own the subvolume being
* snapshotted:
*/
- if (uid && new_inode->bi_uid != uid) {
+ if (uid &&
+ !capable(CAP_FOWNER) &&
+ new_inode->bi_uid != uid) {
ret = -EPERM;
goto err;
}
@@ -727,7 +729,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
bool in_fsck)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct btree_iter bp_iter = {};
int ret = 0;
@@ -835,7 +837,6 @@ out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &bp_iter);
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -847,7 +848,7 @@ int __bch2_check_dirent_target(struct btree_trans *trans,
bool in_fsck)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
@@ -882,7 +883,6 @@ int __bch2_check_dirent_target(struct btree_trans *trans,
}
err:
fsck_err:
- printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -940,7 +940,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
snapshot_id_list *snapshot_overwrites,
bool *do_update)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool repairing_parents = false;
int ret = 0;
@@ -967,7 +967,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
snapshot_overwrites, &buf);
if (ret)
- goto err;
+ return ret;
if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
inode->bi_flags |= BCH_INODE_has_case_insensitive;
@@ -986,14 +986,14 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
if (dir.bi_parent_subvol) {
ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
if (ret)
- goto err;
+ return ret;
snapshot_overwrites = NULL;
}
ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
if (ret)
- goto err;
+ return ret;
if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
@@ -1001,13 +1001,13 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
snapshot_overwrites, &buf);
if (ret)
- goto err;
+ return ret;
if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
dir.bi_flags |= BCH_INODE_has_case_insensitive;
ret = __bch2_fsck_write_inode(trans, &dir);
if (ret)
- goto err;
+ return ret;
}
}
@@ -1019,9 +1019,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
break;
}
out:
-err:
fsck_err:
- printbuf_exit(&buf);
if (ret)
return ret;
diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c
index 962218fa68ec..58cfd540c6d6 100644
--- a/fs/bcachefs/nocow_locking.c
+++ b/fs/bcachefs/nocow_locking.c
@@ -47,7 +47,7 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
int v, lock_val = flags ? 1 : -1;
unsigned i;
- spin_lock(&l->lock);
+ guard(spinlock)(&l->lock);
for (i = 0; i < ARRAY_SIZE(l->b); i++)
if (l->b[i] == dev_bucket)
@@ -58,21 +58,19 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
l->b[i] = dev_bucket;
goto take_lock;
}
-fail:
- spin_unlock(&l->lock);
+
return false;
got_entry:
v = atomic_read(&l->l[i]);
if (lock_val > 0 ? v < 0 : v > 0)
- goto fail;
+ return false;
take_lock:
v = atomic_read(&l->l[i]);
/* Overflow? */
if (v && sign(v + lock_val) != sign(v))
- goto fail;
+ return false;
atomic_add(lock_val, &l->l[i]);
- spin_unlock(&l->lock);
return true;
}
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index b1cf88905b81..921f9049912d 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -584,7 +584,7 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
break;
case Opt_discard:
if (!ca) {
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
for_each_member_device(c, ca) {
struct bch_member *m =
bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx);
@@ -592,7 +592,6 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
}
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
break;
case Opt_version_upgrade:
@@ -613,7 +612,6 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
struct printbuf *parse_later,
const char *name, const char *val)
{
- struct printbuf err = PRINTBUF;
u64 v;
int ret, id;
@@ -638,46 +636,36 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
val = bch2_opt_val_synonym_lookup(name, val);
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
- goto bad_opt;
+ return -BCH_ERR_option_name;
if (id == Opt_acl &&
!IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
- goto bad_opt;
+ return -BCH_ERR_option_name;
if ((id == Opt_usrquota ||
id == Opt_grpquota) &&
!IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
- goto bad_opt;
+ return -BCH_ERR_option_name;
+ CLASS(printbuf, err)();
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
if (ret == -BCH_ERR_option_needs_open_fs) {
- ret = 0;
-
if (parse_later) {
prt_printf(parse_later, "%s=%s,", name, val);
if (parse_later->allocation_failure)
- ret = -ENOMEM;
+ return -ENOMEM;
}
- goto out;
+ return 0;
}
if (ret < 0)
- goto bad_val;
+ return -BCH_ERR_option_value;
if (opts)
bch2_opt_set_by_id(opts, id, v);
- ret = 0;
-out:
- printbuf_exit(&err);
- return ret;
-bad_opt:
- ret = -BCH_ERR_option_name;
- goto out;
-bad_val:
- ret = -BCH_ERR_option_value;
- goto out;
+ return 0;
}
int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
@@ -805,11 +793,10 @@ bool __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca,
const struct bch_option *opt, u64 v)
{
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v);
if (changed)
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
return changed;
}
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 63f8e254495c..84ce69a7f131 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -150,12 +150,12 @@ enum fsck_err_opts {
NULL, "Number of consecutive write errors allowed before kicking out a device")\
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
- OPT_UINT(1, BCH_REPLICAS_MAX), \
+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \
BCH_SB_META_REPLICAS_WANT, 1, \
"#", "Number of metadata replicas") \
x(data_replicas, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
- OPT_UINT(1, BCH_REPLICAS_MAX), \
+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \
BCH_SB_DATA_REPLICAS_WANT, 1, \
"#", "Number of data replicas") \
x(metadata_replicas_required, u8, \
@@ -165,7 +165,7 @@ enum fsck_err_opts {
"#", NULL) \
x(data_replicas_required, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT, \
- OPT_UINT(1, BCH_REPLICAS_MAX), \
+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \
BCH_SB_DATA_REPLICAS_REQ, 1, \
"#", NULL) \
x(encoded_extent_max, u32, \
@@ -529,7 +529,7 @@ enum fsck_err_opts {
"size", "Specifies the bucket size; must be greater than the btree node size")\
x(durability, u8, \
OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS, \
- OPT_UINT(0, BCH_REPLICAS_MAX), \
+ OPT_UINT(0, BCH_REPLICAS_MAX + 1), \
BCH_MEMBER_DURABILITY, 1, \
"n", "Data written to this device will be considered\n"\
"to have already been replicated n times") \
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index 8f4e28d440ac..907e5c97550b 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -295,4 +295,8 @@ static inline void printbuf_atomic_dec(struct printbuf *buf)
buf->atomic--;
}
+DEFINE_GUARD(printbuf_atomic, struct printbuf *,
+ printbuf_atomic_inc(_T),
+ printbuf_atomic_dec(_T));
+
#endif /* _BCACHEFS_PRINTBUF_H */
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index d09898566abe..42353067ba28 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -46,7 +46,7 @@ void bch2_progress_update_iter(struct btree_trans *trans,
s->last_node = b;
if (progress_update_p(s)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
unsigned percent = s->nodes_total
? div64_u64(s->nodes_seen * 100, s->nodes_total)
: 0;
@@ -56,6 +56,5 @@ void bch2_progress_update_iter(struct btree_trans *trans,
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
bch_info(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
}
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index f241efb1fb50..5f1eff591b29 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -394,12 +394,10 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k,
dq = bkey_s_c_to_quota(k);
q = &c->quotas[k.k->p.inode];
- mutex_lock(&q->lock);
+ guard(mutex)(&q->lock);
mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
- if (!mq) {
- mutex_unlock(&q->lock);
+ if (!mq)
return -ENOMEM;
- }
for (i = 0; i < Q_COUNTERS; i++) {
mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
@@ -414,8 +412,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k,
mq->c[Q_INO].timer = qdq->d_ino_timer;
if (qdq && qdq->d_fieldmask & QC_INO_WARNS)
mq->c[Q_INO].warns = qdq->d_ino_warns;
-
- mutex_unlock(&q->lock);
}
return 0;
@@ -522,24 +518,21 @@ advance:
int bch2_fs_quota_read(struct bch_fs *c)
{
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
+ if (!sb_quota)
+ return bch_err_throw(c, ENOSPC_sb_quota);
- mutex_lock(&c->sb_lock);
- struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
- if (!sb_quota) {
- mutex_unlock(&c->sb_lock);
- return bch_err_throw(c, ENOSPC_sb_quota);
+ bch2_sb_quota_read(c);
}
- bch2_sb_quota_read(c);
- mutex_unlock(&c->sb_lock);
-
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN,
BTREE_ITER_prefetch, k,
__bch2_quota_set(c, k, NULL)) ?:
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- bch2_fs_quota_read_inode(trans, &iter, k)));
+ bch2_fs_quota_read_inode(trans, &iter, k));
bch_err_fn(c, ret);
return ret;
}
@@ -550,7 +543,6 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
{
struct bch_fs *c = sb->s_fs_info;
struct bch_sb_field_quota *sb_quota;
- int ret = 0;
if (sb->s_flags & SB_RDONLY)
return -EROFS;
@@ -569,11 +561,12 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
return -EINVAL;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
- ret = bch_err_throw(c, ENOSPC_sb_quota);
- goto unlock;
+ int ret = bch_err_throw(c, ENOSPC_sb_quota);
+ bch_err_fn(c, ret);
+ return ret;
}
if (uflags & FS_QUOTA_UDQ_ENFD)
@@ -586,10 +579,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
bch2_write_super(c);
-unlock:
- mutex_unlock(&c->sb_lock);
-
- return bch2_err_class(ret);
+ return 0;
}
static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
@@ -599,7 +589,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
if (uflags & FS_QUOTA_UDQ_ENFD)
SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
@@ -610,8 +600,6 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
return 0;
}
@@ -700,14 +688,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
{
struct bch_fs *c = sb->s_fs_info;
struct bch_sb_field_quota *sb_quota;
- int ret = 0;
if (0) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
qc_info_to_text(&buf, info);
pr_info("setting:\n%s", buf.buf);
- printbuf_exit(&buf);
}
if (sb->s_flags & SB_RDONLY)
@@ -723,11 +709,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
return -EINVAL;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
- ret = bch_err_throw(c, ENOSPC_sb_quota);
- goto unlock;
+ int ret = bch_err_throw(c, ENOSPC_sb_quota);
+ bch_err_fn(c, ret);
+ return bch2_err_class(ret);
}
if (info->i_fieldmask & QC_SPC_TIMER)
@@ -749,10 +736,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
bch2_sb_quota_read(c);
bch2_write_super(c);
-unlock:
- mutex_unlock(&c->sb_lock);
-
- return bch2_err_class(ret);
+ return 0;
}
/* Get/set individual quotas: */
@@ -778,15 +762,13 @@ static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
struct bch_fs *c = sb->s_fs_info;
struct bch_memquota_type *q = &c->quotas[kqid.type];
qid_t qid = from_kqid(&init_user_ns, kqid);
- struct bch_memquota *mq;
memset(qdq, 0, sizeof(*qdq));
- mutex_lock(&q->lock);
- mq = genradix_ptr(&q->table, qid);
+ guard(mutex)(&q->lock);
+ struct bch_memquota *mq = genradix_ptr(&q->table, qid);
if (mq)
__bch2_quota_get(qdq, mq);
- mutex_unlock(&q->lock);
return 0;
}
@@ -799,21 +781,17 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
qid_t qid = from_kqid(&init_user_ns, *kqid);
struct genradix_iter iter;
struct bch_memquota *mq;
- int ret = 0;
- mutex_lock(&q->lock);
+ guard(mutex)(&q->lock);
genradix_for_each_from(&q->table, iter, mq, qid)
if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
__bch2_quota_get(qdq, mq);
*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
- goto found;
+ return 0;
}
- ret = -ENOENT;
-found:
- mutex_unlock(&q->lock);
- return bch2_err_class(ret);
+ return -ENOENT;
}
static int bch2_set_quota_trans(struct btree_trans *trans,
@@ -821,12 +799,10 @@ static int bch2_set_quota_trans(struct btree_trans *trans,
struct qc_dqblk *qdq)
{
struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p,
- BTREE_ITER_slots|BTREE_ITER_intent);
- ret = bkey_err(k);
+ struct bkey_s_c k =
+ bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p,
+ BTREE_ITER_slots|BTREE_ITER_intent);
+ int ret = bkey_err(k);
if (unlikely(ret))
return ret;
@@ -852,24 +828,22 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct qc_dqblk *qdq)
{
struct bch_fs *c = sb->s_fs_info;
- struct bkey_i_quota new_quota;
- int ret;
if (0) {
- struct printbuf buf = PRINTBUF;
-
+ CLASS(printbuf, buf)();
qc_dqblk_to_text(&buf, qdq);
pr_info("setting:\n%s", buf.buf);
- printbuf_exit(&buf);
}
if (sb->s_flags & SB_RDONLY)
return -EROFS;
+ struct bkey_i_quota new_quota;
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
- ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ CLASS(btree_trans, trans)(c);
+ int ret = commit_do(trans, NULL, NULL, 0,
bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 73b463c94966..32fa7cf90b63 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -235,24 +235,19 @@ static const char * const bch2_rebalance_state_strs[] = {
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
{
struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_i_cookie *cookie;
- u64 v;
- int ret;
-
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
BTREE_ITER_intent);
- k = bch2_btree_iter_peek_slot(trans, &iter);
- ret = bkey_err(k);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter);
+ int ret = bkey_err(k);
if (ret)
goto err;
- v = k.k->type == KEY_TYPE_cookie
+ u64 v = k.k->type == KEY_TYPE_cookie
? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
: 0;
- cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
+ struct bkey_i_cookie *cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
ret = PTR_ERR_OR_ZERO(cookie);
if (ret)
goto err;
@@ -269,8 +264,8 @@ err:
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
{
- int ret = bch2_trans_commit_do(c, NULL, NULL,
- BCH_TRANS_COMMIT_no_enospc,
+ CLASS(btree_trans, trans)(c);
+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_set_rebalance_needs_scan_trans(trans, inum));
bch2_rebalance_wakeup(c);
return ret;
@@ -284,19 +279,15 @@ int bch2_set_fs_needs_rebalance(struct bch_fs *c)
static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
{
struct btree_iter iter;
- struct bkey_s_c k;
- u64 v;
- int ret;
-
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
BTREE_ITER_intent);
- k = bch2_btree_iter_peek_slot(trans, &iter);
- ret = bkey_err(k);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter);
+ int ret = bkey_err(k);
if (ret)
goto err;
- v = k.k->type == KEY_TYPE_cookie
+ u64 v = k.k->type == KEY_TYPE_cookie
? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
: 0;
@@ -373,7 +364,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
}
if (trace_rebalance_extent_enabled()) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
@@ -399,7 +390,6 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
}
trace_rebalance_extent(c, buf.buf);
- printbuf_exit(&buf);
}
return k;
@@ -713,17 +703,15 @@ void bch2_rebalance_stop(struct bch_fs *c)
int bch2_rebalance_start(struct bch_fs *c)
{
- struct task_struct *p;
- int ret;
-
if (c->rebalance.thread)
return 0;
if (c->opts.nochanges)
return 0;
- p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
- ret = PTR_ERR_OR_ZERO(p);
+ struct task_struct *p =
+ kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
+ int ret = PTR_ERR_OR_ZERO(p);
bch_err_msg(c, ret, "creating rebalance thread");
if (ret)
return ret;
@@ -779,7 +767,7 @@ static int check_rebalance_work_one(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_s_c extent_k, rebalance_k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?:
bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter));
@@ -833,7 +821,7 @@ static int check_rebalance_work_one(struct btree_trans *trans,
ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
extent_k.k->p, false);
if (ret)
- goto err;
+ return ret;
}
if (fsck_err_on(should_have_rebalance && !have_rebalance,
@@ -842,22 +830,20 @@ static int check_rebalance_work_one(struct btree_trans *trans,
ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
extent_k.k->p, true);
if (ret)
- goto err;
+ return ret;
}
if (cmp <= 0)
bch2_btree_iter_advance(trans, extent_iter);
if (cmp >= 0)
bch2_btree_iter_advance(trans, rebalance_iter);
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
int bch2_check_rebalance_work(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter rebalance_iter, extent_iter;
int ret = 0;
@@ -884,6 +870,5 @@ int bch2_check_rebalance_work(struct bch_fs *c)
bch2_bkey_buf_exit(&last_flushed, c);
bch2_trans_iter_exit(trans, &extent_iter);
bch2_trans_iter_exit(trans, &rebalance_iter);
- bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 0def4ecb7f88..58c159e5f10d 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -37,80 +37,79 @@ int bch2_btree_lost_data(struct bch_fs *c,
struct printbuf *msg,
enum btree_id btree)
{
- u64 b = BIT_ULL(btree);
int ret = 0;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
+ bool write_sb = false;
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- if (!(c->sb.btrees_lost_data & b)) {
+ if (!(c->sb.btrees_lost_data & BIT_ULL(btree))) {
prt_printf(msg, "flagging btree ");
bch2_btree_id_to_text(msg, btree);
prt_printf(msg, " lost data\n");
- ext->btrees_lost_data |= cpu_to_le64(b);
+ write_sb |= !__test_and_set_bit_le64(btree, &ext->btrees_lost_data);
}
/* Once we have runtime self healing for topology errors we won't need this: */
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret;
/* Btree node accounting will be off: */
- __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret;
#ifdef CONFIG_BCACHEFS_DEBUG
/*
* These are much more minor, and don't need to be corrected right away,
* but in debug mode we want the next fsck run to be clean:
*/
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret;
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret;
#endif
switch (btree) {
case BTREE_ID_alloc:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
-
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
- __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret;
+
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
goto out;
case BTREE_ID_backpointers:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret;
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0, &write_sb) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_need_discard:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_freespace:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_bucket_gens:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_lru:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_accounting:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret;
goto out;
case BTREE_ID_snapshots:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret;
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0, &write_sb) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret;
goto out;
default:
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
- ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret;
goto out;
}
out:
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
+ if (write_sb)
+ bch2_write_super(c);
return ret;
}
@@ -123,7 +122,7 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree)
/* for -o reconstruct_alloc: */
void bch2_reconstruct_alloc(struct bch_fs *c)
{
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
__set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
@@ -167,7 +166,6 @@ void bch2_reconstruct_alloc(struct bch_fs *c)
c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info));
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
for (unsigned i = 0; i < btree_id_nr_alive(c); i++)
if (btree_id_is_alloc(i))
@@ -339,14 +337,15 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq - 1, r->journal_seq - 1);
}
+DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *)
+
int bch2_journal_replay(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
- DARRAY(struct journal_key *) keys_sorted = { 0 };
+ CLASS(darray_journal_keys, keys_sorted)();
struct journal *j = &c->journal;
u64 start_seq = c->journal_replay_seq_start;
u64 end_seq = c->journal_replay_seq_start;
- struct btree_trans *trans = NULL;
bool immediate_flush = false;
int ret = 0;
@@ -354,13 +353,13 @@ int bch2_journal_replay(struct bch_fs *c)
ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
keys->nr, start_seq, end_seq);
if (ret)
- goto err;
+ return ret;
}
BUG_ON(!atomic_read(&keys->ref));
move_gap(keys, keys->nr);
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
/*
* Replay accounting keys first: we can't allow the write buffer to
@@ -380,7 +379,7 @@ int bch2_journal_replay(struct bch_fs *c)
BCH_WATERMARK_reclaim,
bch2_journal_replay_accounting_key(trans, k));
if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret)))
- goto err;
+ return ret;
k->overwritten = true;
}
@@ -414,7 +413,7 @@ int bch2_journal_replay(struct bch_fs *c)
if (ret) {
ret = darray_push(&keys_sorted, k);
if (ret)
- goto err;
+ return ret;
}
}
@@ -445,22 +444,16 @@ int bch2_journal_replay(struct bch_fs *c)
: 0),
bch2_journal_replay_key(trans, k));
if (ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_btree_id_level_to_text(&buf, k->btree_id, k->level);
bch_err_msg(c, ret, "while replaying key at %s:", buf.buf);
- printbuf_exit(&buf);
- goto err;
+ return ret;
}
BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten);
}
- /*
- * We need to put our btree_trans before calling flush_all_pins(), since
- * that will use a btree_trans internally
- */
- bch2_trans_put(trans);
- trans = NULL;
+ bch2_trans_unlock_long(trans);
if (!c->opts.retain_recovery_info &&
c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay)
@@ -479,12 +472,7 @@ int bch2_journal_replay(struct bch_fs *c)
if (keys->nr)
bch2_journal_log_msg(c, "journal replay finished");
-err:
- if (trans)
- bch2_trans_put(trans);
- darray_exit(&keys_sorted);
- bch_err_fn(c, ret);
- return ret;
+ return 0;
}
/* journal replay early: */
@@ -596,7 +584,7 @@ static int journal_replay_early(struct bch_fs *c,
static int read_btree_roots(struct bch_fs *c)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
@@ -632,7 +620,6 @@ static int read_btree_roots(struct bch_fs *c)
}
}
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -666,7 +653,7 @@ static bool check_version_upgrade(struct bch_fs *c)
}
if (new_version > old_version) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (old_version < bcachefs_metadata_required_upgrade_below)
prt_str(&buf, "Version upgrade required:\n");
@@ -699,14 +686,12 @@ static bool check_version_upgrade(struct bch_fs *c)
}
bch_notice(c, "%s", buf.buf);
- printbuf_exit(&buf);
-
ret = true;
}
if (new_version > c->sb.version_incompat_allowed &&
c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "Now allowing incompatible features up to ");
bch2_version_to_text(&buf, new_version);
@@ -715,8 +700,6 @@ static bool check_version_upgrade(struct bch_fs *c)
prt_newline(&buf);
bch_notice(c, "%s", buf.buf);
- printbuf_exit(&buf);
-
ret = true;
}
@@ -796,15 +779,14 @@ int bch2_fs_recovery(struct bch_fs *c)
u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
if (sb_passes) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
bch_info(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
if (bch2_check_version_downgrade(c)) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "Version downgrade required:");
@@ -820,7 +802,6 @@ int bch2_fs_recovery(struct bch_fs *c)
}
bch_info(c, "%s", buf.buf);
- printbuf_exit(&buf);
write_sb = true;
}
@@ -937,11 +918,10 @@ use_clean:
if (ret)
goto err;
- ret = bch2_fs_resize_on_mount(c);
- if (ret) {
- up_write(&c->state_lock);
+ scoped_guard(rwsem_write, &c->state_lock)
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret)
goto err;
- }
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
bch_info(c, "filesystem is an unresized image file, mounting ro");
@@ -1119,10 +1099,9 @@ use_clean:
bch2_move_stats_init(&stats, "recovery");
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_version_to_text(&buf, c->sb.version_min);
bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf);
- printbuf_exit(&buf);
ret = bch2_fs_read_write_early(c) ?:
bch2_scan_old_btree_nodes(c, &stats);
@@ -1150,14 +1129,13 @@ final_out:
err:
fsck_err:
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
goto final_out;
}
@@ -1167,40 +1145,54 @@ int bch2_fs_initialize(struct bch_fs *c)
struct bch_inode_unpacked root_inode, lostfound_inode;
struct bkey_inode_buf packed_inode;
struct qstr lostfound = QSTR("lost+found");
- struct bch_member *m;
int ret;
bch_notice(c, "initializing new filesystem");
set_bit(BCH_FS_new_fs, &c->flags);
- mutex_lock(&c->sb_lock);
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
-
- bch2_check_version_downgrade(c);
+ scoped_guard(mutex, &c->sb_lock) {
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
- if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
- bch2_sb_upgrade(c, bcachefs_metadata_version_current, false);
- SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
- bch2_write_super(c);
- }
+ bch2_check_version_downgrade(c);
- for_each_member_device(c, ca) {
- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
- }
+ if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false);
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
+ bch2_write_super(c);
+ }
- bch2_sb_members_to_cpu(c);
+ for_each_member_device(c, ca) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
+ }
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ bch2_write_super(c);
+ }
set_bit(BCH_FS_btree_running, &c->flags);
- set_bit(BCH_FS_may_go_rw, &c->flags);
for (unsigned i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc_fake(c, i, 0);
+ for_each_member_device(c, ca) {
+ ret = bch2_dev_usage_init(ca, false);
+ if (ret) {
+ bch2_dev_put(ca);
+ goto err;
+ }
+ }
+
+ /*
+ * Write out the superblock and journal buckets, now that we can do
+ * btree updates
+ */
+ bch_verbose(c, "marking superblocks");
+ ret = bch2_trans_mark_dev_sbs(c);
+ bch_err_msg(c, ret, "marking superblocks");
+ if (ret)
+ goto err;
+
ret = bch2_fs_journal_alloc(c);
if (ret)
goto err;
@@ -1213,28 +1205,12 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
+ set_bit(BCH_FS_may_go_rw, &c->flags);
ret = bch2_fs_read_write_early(c);
if (ret)
goto err;
- set_bit(BCH_FS_accounting_replay_done, &c->flags);
- bch2_journal_set_replay_done(&c->journal);
-
- for_each_member_device(c, ca) {
- ret = bch2_dev_usage_init(ca, false);
- if (ret) {
- bch2_dev_put(ca);
- goto err;
- }
- }
-
- /*
- * Write out the superblock and journal buckets, now that we can do
- * btree updates
- */
- bch_verbose(c, "marking superblocks");
- ret = bch2_trans_mark_dev_sbs(c);
- bch_err_msg(c, ret, "marking superblocks");
+ ret = bch2_journal_replay(c);
if (ret)
goto err;
@@ -1292,12 +1268,11 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- mutex_lock(&c->sb_lock);
- SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
- SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock) {
+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ bch2_write_super(c);
+ }
c->recovery.curr_pass = BCH_RECOVERY_PASS_NR;
return 0;
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index c09ed2dd4639..b2cdd111fd0e 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -237,19 +237,21 @@ static int bch2_lookup_root_inode(struct bch_fs *c)
subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM;
struct bch_inode_unpacked inode_u;
struct bch_subvolume subvol;
+ CLASS(btree_trans, trans)(c);
- return bch2_trans_do(c,
+ return lockrestart_do(trans,
bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
}
struct recovery_pass_fn {
int (*fn)(struct bch_fs *);
+ const char *name;
unsigned when;
};
static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .name = #_fn, .when = _when },
BCH_RECOVERY_PASSES()
#undef x
};
@@ -338,7 +340,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
struct printbuf *out,
enum bch_recovery_pass pass,
- enum bch_run_recovery_pass_flags flags)
+ enum bch_run_recovery_pass_flags flags,
+ bool *write_sb)
{
struct bch_fs_recovery *r = &c->recovery;
int ret = 0;
@@ -346,13 +349,11 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
lockdep_assert_held(&c->sb_lock);
bch2_printbuf_make_room(out, 1024);
- out->atomic++;
-
- unsigned long lockflags;
- spin_lock_irqsave(&r->lock, lockflags);
+ guard(printbuf_atomic)(out);
+ guard(spinlock_irq)(&r->lock);
if (!recovery_pass_needs_set(c, pass, &flags))
- goto out;
+ return 0;
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool rewind = in_recovery &&
@@ -360,17 +361,17 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
!(r->passes_complete & BIT_ULL(pass));
bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
- if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) {
+ if (!(flags & RUN_RECOVERY_PASS_nopersistent)) {
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
+ *write_sb |= !__test_and_set_bit_le64(bch2_recovery_pass_to_stable(pass),
+ ext->recovery_passes_required);
}
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
(!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) {
prt_printf(out, "need recovery pass %s (%u), but already rw\n",
bch2_recovery_passes[pass], pass);
- ret = bch_err_throw(c, cannot_rewind_recovery);
- goto out;
+ return bch_err_throw(c, cannot_rewind_recovery);
}
if (ratelimit)
@@ -400,9 +401,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
if (p->when & PASS_ONLINE)
bch2_run_async_recovery_passes(c);
}
-out:
- spin_unlock_irqrestore(&r->lock, lockflags);
- --out->atomic;
+
return ret;
}
@@ -411,14 +410,19 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
enum bch_recovery_pass pass,
enum bch_run_recovery_pass_flags flags)
{
- int ret = 0;
+ /*
+ * With RUN_RECOVERY_PASS_ratelimit, recovery_pass_needs_set needs
+ * sb_lock
+ */
+ if (!(flags & RUN_RECOVERY_PASS_ratelimit) &&
+ !recovery_pass_needs_set(c, pass, &flags))
+ return 0;
- if (recovery_pass_needs_set(c, pass, &flags)) {
- guard(mutex)(&c->sb_lock);
- ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
+ guard(mutex)(&c->sb_lock);
+ bool write_sb = false;
+ int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb);
+ if (write_sb)
bch2_write_super(c);
- }
-
return ret;
}
@@ -441,14 +445,13 @@ int bch2_require_recovery_pass(struct bch_fs *c,
return 0;
enum bch_run_recovery_pass_flags flags = 0;
- int ret = 0;
- if (recovery_pass_needs_set(c, pass, &flags)) {
- ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
+ bool write_sb = false;
+ int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb) ?:
+ bch_err_throw(c, recovery_pass_will_run);
+ if (write_sb)
bch2_write_super(c);
- }
-
- return ret ?: bch_err_throw(c, recovery_pass_will_run);
+ return ret;
}
int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
@@ -458,16 +461,16 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pa
if (!recovery_pass_needs_set(c, pass, &flags))
return 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
+ bool write_sb = false;
int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass,
- RUN_RECOVERY_PASS_nopersistent);
- mutex_unlock(&c->sb_lock);
+ RUN_RECOVERY_PASS_nopersistent,
+ &write_sb);
bch2_print_str(c, KERN_NOTICE, buf.buf);
- printbuf_exit(&buf);
return ret;
}
@@ -486,6 +489,7 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
r->passes_to_run &= ~BIT_ULL(pass);
if (ret) {
+ bch_err(c, "%s(): error %s", p->name, bch2_err_str(ret));
r->passes_failing |= BIT_ULL(pass);
return ret;
}
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
index 2117f0ce1922..4f2c2f811d5e 100644
--- a/fs/bcachefs/recovery_passes.h
+++ b/fs/bcachefs/recovery_passes.h
@@ -30,7 +30,8 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pas
int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
enum bch_recovery_pass,
- enum bch_run_recovery_pass_flags);
+ enum bch_run_recovery_pass_flags,
+ bool *);
int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
enum bch_recovery_pass,
enum bch_run_recovery_pass_flags);
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 8d8e045b6bd5..60abd89d7c9f 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -183,7 +183,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
u64 live_end = REFLINK_P_IDX(p.v) + p.k->size;
u64 refd_start = live_start - le32_to_cpu(p.v->front_pad);
u64 refd_end = live_end + le32_to_cpu(p.v->back_pad);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(missing_start < refd_start);
@@ -195,7 +195,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
prt_printf(&buf, "pointer to missing indirect extent in ");
ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos);
if (ret)
- goto err;
+ return ret;
prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9);
bch2_bkey_val_to_text(&buf, c, p.s_c);
@@ -207,7 +207,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- goto err;
+ return ret;
/*
* Is the missing range not actually needed?
@@ -238,15 +238,13 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun);
if (ret)
- goto err;
+ return ret;
if (should_commit)
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
bch_err_throw(c, transaction_restart_nested);
}
-err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -301,7 +299,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v);
struct btree_iter iter;
@@ -360,7 +358,6 @@ next:
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
return ret;
}
@@ -374,7 +371,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans,
int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1;
u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad);
s64 ret = 0;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
if (r_idx >= c->reflink_gc_nr)
goto not_found;
@@ -394,12 +391,10 @@ not_found:
if (flags & BTREE_TRIGGER_check_repair) {
ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false);
if (ret)
- goto err;
+ return ret;
}
*idx = next_idx;
-err:
- printbuf_exit(&buf);
return ret;
}
@@ -498,20 +493,15 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
bool reflink_p_may_update_opts_field)
{
struct bch_fs *c = trans->c;
- struct btree_iter reflink_iter = {};
- struct bkey_s_c k;
- struct bkey_i *r_v;
- struct bkey_i_reflink_p *r_p;
- __le64 *refcount;
- int ret;
if (orig->k.type == KEY_TYPE_inline_data)
bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data);
+ struct btree_iter reflink_iter;
bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX,
BTREE_ITER_intent);
- k = bch2_btree_iter_peek_prev(trans, &reflink_iter);
- ret = bkey_err(k);
+ struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, &reflink_iter);
+ int ret = bkey_err(k);
if (ret)
goto err;
@@ -523,7 +513,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size)))
return -ENOSPC;
- r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
+ struct bkey_i *r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
ret = PTR_ERR_OR_ZERO(r_v);
if (ret)
goto err;
@@ -536,7 +526,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
- refcount = bkey_refcount(bkey_i_to_s(r_v));
+ __le64 *refcount = bkey_refcount(bkey_i_to_s(r_v));
*refcount = 0;
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
@@ -549,7 +539,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
* so we know it will be big enough:
*/
orig->k.type = KEY_TYPE_reflink_p;
- r_p = bkey_i_to_reflink_p(orig);
+
+ struct bkey_i_reflink_p *r_p = bkey_i_to_reflink_p(orig);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
/* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
@@ -598,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c,
u64 new_i_size, s64 *i_sectors_delta,
bool may_change_src_io_path_opts)
{
- struct btree_trans *trans;
struct btree_iter dst_iter, src_iter;
struct bkey_s_c src_k;
struct bkey_buf new_dst, new_src;
@@ -623,7 +613,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_init(&new_dst);
bch2_bkey_buf_init(&new_src);
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
ret = bch2_inum_opts_get(trans, src_inum, &opts);
if (ret)
@@ -761,7 +751,6 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_trans_iter_exit(trans, &inode_iter);
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
err:
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
@@ -779,7 +768,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
const __le64 *refcount = bkey_refcount_c(k);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct reflink_gc *r;
int ret = 0;
@@ -807,7 +796,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- goto out;
+ return ret;
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
@@ -815,32 +804,30 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
ret = bch2_trans_update(trans, iter, new, 0);
}
-out:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
int bch2_gc_reflink_done(struct bch_fs *c)
{
+ CLASS(btree_trans, trans)(c);
size_t idx = 0;
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ int ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_reflink, POS_MIN,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
+ bch2_gc_write_reflink_key(trans, &iter, k, &idx));
c->reflink_gc_nr = 0;
return ret;
}
int bch2_gc_reflink_start(struct bch_fs *c)
{
+ CLASS(btree_trans, trans)(c);
c->reflink_gc_nr = 0;
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_prefetch, k, ({
const __le64 *refcount = bkey_refcount_c(k);
@@ -858,7 +845,7 @@ int bch2_gc_reflink_start(struct bch_fs *c)
r->size = k.k->size;
r->refcount = 0;
0;
- })));
+ }));
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 8383bd7fdb3f..0784283ce78c 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -286,11 +286,8 @@ bool bch2_replicas_marked_locked(struct bch_fs *c,
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
- percpu_down_read(&c->mark_lock);
- bool ret = bch2_replicas_marked_locked(c, search);
- percpu_up_read(&c->mark_lock);
-
- return ret;
+ guard(percpu_read)(&c->mark_lock);
+ return bch2_replicas_marked_locked(c, search);
}
noinline
@@ -305,14 +302,14 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
memset(&new_r, 0, sizeof(new_r));
memset(&new_gc, 0, sizeof(new_gc));
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
if (c->replicas_gc.entries &&
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries) {
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
- goto err;
+ goto out;
}
}
@@ -320,12 +317,12 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries) {
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
- goto err;
+ goto out;
}
ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
if (ret)
- goto err;
+ goto out;
}
if (!new_r.entries &&
@@ -338,22 +335,18 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
bch2_write_super(c);
/* don't update in memory replicas until changes are persistent */
- percpu_down_write(&c->mark_lock);
- if (new_r.entries)
- swap(c->replicas, new_r);
- if (new_gc.entries)
- swap(new_gc, c->replicas_gc);
- percpu_up_write(&c->mark_lock);
+ scoped_guard(percpu_write, &c->mark_lock) {
+ if (new_r.entries)
+ swap(c->replicas, new_r);
+ if (new_gc.entries)
+ swap(new_gc, c->replicas_gc);
+ }
out:
- mutex_unlock(&c->sb_lock);
-
kfree(new_r.entries);
kfree(new_gc.entries);
- return ret;
-err:
bch_err_msg(c, ret, "adding replicas entry");
- goto out;
+ return ret;
}
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
@@ -371,24 +364,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
{
lockdep_assert_held(&c->replicas_gc_lock);
- mutex_lock(&c->sb_lock);
- percpu_down_write(&c->mark_lock);
-
- ret = ret ?:
- bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
- if (!ret)
- swap(c->replicas, c->replicas_gc);
-
- kfree(c->replicas_gc.entries);
- c->replicas_gc.entries = NULL;
+ guard(mutex)(&c->sb_lock);
+ scoped_guard(percpu_write, &c->mark_lock) {
+ ret = ret ?:
+ bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
+ if (!ret)
+ swap(c->replicas, c->replicas_gc);
- percpu_up_write(&c->mark_lock);
+ kfree(c->replicas_gc.entries);
+ c->replicas_gc.entries = NULL;
+ }
if (!ret)
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
return ret;
}
@@ -399,7 +388,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
lockdep_assert_held(&c->replicas_gc_lock);
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
BUG_ON(c->replicas_gc.entries);
c->replicas_gc.nr = 0;
@@ -420,7 +409,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
c->replicas_gc.entry_size,
GFP_KERNEL);
if (!c->replicas_gc.entries) {
- mutex_unlock(&c->sb_lock);
bch_err(c, "error allocating c->replicas_gc");
return bch_err_throw(c, ENOMEM_replicas_gc);
}
@@ -432,8 +420,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
e, c->replicas_gc.entry_size);
bch2_cpu_replicas_sort(&c->replicas_gc);
- mutex_unlock(&c->sb_lock);
-
return 0;
}
@@ -461,55 +447,48 @@ retry:
return bch_err_throw(c, ENOMEM_replicas_gc);
}
- mutex_lock(&c->sb_lock);
- percpu_down_write(&c->mark_lock);
-
- if (nr != c->replicas.nr ||
- new.entry_size != c->replicas.entry_size) {
- percpu_up_write(&c->mark_lock);
- mutex_unlock(&c->sb_lock);
- kfree(new.entries);
- goto retry;
- }
-
- for (unsigned i = 0; i < c->replicas.nr; i++) {
- struct bch_replicas_entry_v1 *e =
- cpu_replicas_entry(&c->replicas, i);
+ guard(mutex)(&c->sb_lock);
+ scoped_guard(percpu_write, &c->mark_lock) {
+ if (nr != c->replicas.nr ||
+ new.entry_size != c->replicas.entry_size) {
+ kfree(new.entries);
+ goto retry;
+ }
- struct disk_accounting_pos k = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ for (unsigned i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
- unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
- "embedded variable length struct");
+ struct disk_accounting_pos k = {
+ .type = BCH_DISK_ACCOUNTING_replicas,
+ };
- struct bpos p = disk_accounting_pos_to_bpos(&k);
+ unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
+ "embedded variable length struct");
- struct bch_accounting_mem *acc = &c->accounting;
- bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
- accounting_pos_cmp, &p) >= acc->k.nr;
+ struct bpos p = disk_accounting_pos_to_bpos(&k);
- if (e->data_type == BCH_DATA_journal || !kill)
- memcpy(cpu_replicas_entry(&new, new.nr++),
- e, new.entry_size);
- }
+ struct bch_accounting_mem *acc = &c->accounting;
+ bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+ accounting_pos_cmp, &p) >= acc->k.nr;
- bch2_cpu_replicas_sort(&new);
+ if (e->data_type == BCH_DATA_journal || !kill)
+ memcpy(cpu_replicas_entry(&new, new.nr++),
+ e, new.entry_size);
+ }
- ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
+ bch2_cpu_replicas_sort(&new);
- if (!ret)
- swap(c->replicas, new);
+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
- kfree(new.entries);
+ if (!ret)
+ swap(c->replicas, new);
- percpu_up_write(&c->mark_lock);
+ kfree(new.entries);
+ }
if (!ret)
bch2_write_super(c);
-
- mutex_unlock(&c->sb_lock);
-
return ret;
}
@@ -597,9 +576,8 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
bch2_cpu_replicas_sort(&new_r);
- percpu_down_write(&c->mark_lock);
+ guard(percpu_write)(&c->mark_lock);
swap(c->replicas, new_r);
- percpu_up_write(&c->mark_lock);
kfree(new_r.entries);
@@ -809,9 +787,8 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, bool print)
{
struct bch_replicas_entry_v1 *e;
- bool ret = true;
- percpu_down_read(&c->mark_lock);
+ guard(percpu_read)(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
unsigned nr_online = 0, nr_failed = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
@@ -847,21 +824,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
if (dflags & ~flags) {
if (print) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_replicas_entry_to_text(&buf, e);
bch_err(c, "insufficient devices online (%u) for replicas entry %s",
nr_online, buf.buf);
- printbuf_exit(&buf);
}
- ret = false;
- break;
+ return false;
}
}
- percpu_up_read(&c->mark_lock);
- return ret;
+ return true;
}
unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
@@ -904,11 +878,8 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
{
- mutex_lock(&c->sb_lock);
- unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx);
- mutex_unlock(&c->sb_lock);
-
- return ret;
+ guard(mutex)(&c->sb_lock);
+ return bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx);
}
void bch2_fs_replicas_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 59c8770e4a0e..a5916984565e 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -89,8 +89,8 @@ int bch2_verify_superblock_clean(struct bch_fs *c,
{
unsigned i;
struct bch_sb_field_clean *clean = *cleanp;
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
+ CLASS(printbuf, buf1)();
+ CLASS(printbuf, buf2)();
int ret = 0;
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
@@ -140,8 +140,6 @@ int bch2_verify_superblock_clean(struct bch_fs *c,
l2, buf2.buf);
}
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
return ret;
}
@@ -150,7 +148,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
struct bch_sb_field_clean *clean, *sb_clean;
int ret;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean);
if (fsck_err_on(!sb_clean, c,
@@ -158,29 +156,22 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
"superblock marked clean but clean section not present")) {
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
- mutex_unlock(&c->sb_lock);
return ERR_PTR(-BCH_ERR_invalid_sb_clean);
}
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
GFP_KERNEL);
- if (!clean) {
- mutex_unlock(&c->sb_lock);
+ if (!clean)
return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean);
- }
ret = bch2_sb_clean_validate_late(c, clean, READ);
if (ret) {
kfree(clean);
- mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
}
- mutex_unlock(&c->sb_lock);
-
return clean;
fsck_err:
- mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
}
@@ -265,21 +256,16 @@ const struct bch_sb_field_ops bch_sb_field_ops_clean = {
int bch2_fs_mark_dirty(struct bch_fs *c)
{
- int ret;
-
/*
* Unconditionally write superblock, to verify it hasn't changed before
* we go rw:
*/
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
- ret = bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
- return ret;
+ return bch2_write_super(c);
}
void bch2_fs_mark_clean(struct bch_fs *c)
@@ -289,9 +275,9 @@ void bch2_fs_mark_clean(struct bch_fs *c)
unsigned u64s;
int ret;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
- goto out;
+ return;
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
@@ -305,7 +291,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s);
if (!sb_clean) {
bch_err(c, "error resizing superblock while setting filesystem clean");
- goto out;
+ return;
}
sb_clean->flags = 0;
@@ -329,12 +315,10 @@ void bch2_fs_mark_clean(struct bch_fs *c)
ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE);
if (ret) {
bch_err(c, "error writing marking filesystem clean: validate error");
- goto out;
+ return;
}
bch2_journal_pos_from_member_info_set(c);
bch2_write_super(c);
-out:
- mutex_unlock(&c->sb_lock);
}
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index b868702a431a..2e3a56bfd085 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -12,6 +12,12 @@ enum counters_flags {
x(io_read_inline, 80, TYPE_SECTORS) \
x(io_read_hole, 81, TYPE_SECTORS) \
x(io_read_promote, 30, TYPE_COUNTER) \
+ x(io_read_nopromote, 85, TYPE_COUNTER) \
+ x(io_read_nopromote_may_not, 86, TYPE_COUNTER) \
+ x(io_read_nopromote_already_promoted, 87, TYPE_COUNTER) \
+ x(io_read_nopromote_unwritten, 88, TYPE_COUNTER) \
+ x(io_read_nopromote_congested, 89, TYPE_COUNTER) \
+ x(io_read_nopromote_in_flight, 90, TYPE_COUNTER) \
x(io_read_bounce, 31, TYPE_COUNTER) \
x(io_read_split, 33, TYPE_COUNTER) \
x(io_read_reuse_race, 34, TYPE_COUNTER) \
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 1506d05e0665..de56a1ee79db 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -191,7 +191,7 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c)
bool write_sb = false;
int ret = 0;
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (old_version < bcachefs_metadata_version_bucket_stripe_sectors &&
@@ -205,7 +205,6 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c)
if (write_sb)
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
return ret < 0 ? ret : 0;
}
@@ -372,7 +371,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
if (!test_bit(BCH_FS_btree_running, &c->flags))
return 0;
- darray_char table = {};
+ CLASS(darray_char, table)();
int ret = 0;
for (const struct upgrade_downgrade_entry *src = downgrade_table;
@@ -389,7 +388,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
ret = darray_make_room(&table, bytes);
if (ret)
- goto out;
+ return ret;
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
@@ -401,7 +400,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
ret = downgrade_table_extra(c, &table);
if (ret)
- goto out;
+ return ret;
if (!dst->recovery_passes[0] &&
!dst->recovery_passes[1] &&
@@ -416,18 +415,14 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
- goto out;
+ return 0;
d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
- if (!d) {
- ret = bch_err_throw(c, ENOSPC_sb_downgrade);
- goto out;
- }
+ if (!d)
+ return bch_err_throw(c, ENOSPC_sb_downgrade);
memcpy(d->entries, table.data, table.nr);
memset_u64s_tail(d->entries, 0, table.nr);
-out:
- darray_exit(&table);
return ret;
}
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index 48853efdc105..41a259eab4fb 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -110,75 +110,66 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
};
unsigned i;
- mutex_lock(&c->fsck_error_counts_lock);
+ guard(mutex)(&c->fsck_error_counts_lock);
+
for (i = 0; i < e->nr; i++) {
if (err == e->data[i].id) {
e->data[i].nr++;
e->data[i].last_error_time = n.last_error_time;
- goto out;
+ return;
}
if (err < e->data[i].id)
break;
}
if (darray_make_room(e, 1))
- goto out;
+ return;
darray_insert_item(e, i, n);
-out:
- mutex_unlock(&c->fsck_error_counts_lock);
}
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
- bch_sb_errors_cpu *src = &c->fsck_error_counts;
- struct bch_sb_field_errors *dst;
- unsigned i;
-
- mutex_lock(&c->fsck_error_counts_lock);
-
- dst = bch2_sb_field_resize(&c->disk_sb, errors,
- bch2_sb_field_errors_u64s(src->nr));
+ guard(mutex)(&c->fsck_error_counts_lock);
+ bch_sb_errors_cpu *src = &c->fsck_error_counts;
+ struct bch_sb_field_errors *dst =
+ bch2_sb_field_resize(&c->disk_sb, errors,
+ bch2_sb_field_errors_u64s(src->nr));
if (!dst)
- goto err;
+ return;
- for (i = 0; i < src->nr; i++) {
+ for (unsigned i = 0; i < src->nr; i++) {
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
}
-
-err:
- mutex_unlock(&c->fsck_error_counts_lock);
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
{
+ guard(mutex)(&c->fsck_error_counts_lock);
+
struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
bch_sb_errors_cpu *dst = &c->fsck_error_counts;
- unsigned i, nr = bch2_sb_field_errors_nr_entries(src);
- int ret;
+ unsigned nr = bch2_sb_field_errors_nr_entries(src);
if (!nr)
return 0;
- mutex_lock(&c->fsck_error_counts_lock);
- ret = darray_make_room(dst, nr);
+ int ret = darray_make_room(dst, nr);
if (ret)
- goto err;
+ return ret;
dst->nr = nr;
- for (i = 0; i < nr; i++) {
+ for (unsigned i = 0; i < nr; i++) {
dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]);
dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]);
dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time);
}
-err:
- mutex_unlock(&c->fsck_error_counts_lock);
- return ret;
+ return 0;
}
void bch2_fs_sb_errors_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index f2abe92ca130..0573c7b00151 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -12,7 +12,7 @@
int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
bool removed = test_bit(dev, c->devs_removed.d);
@@ -20,6 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
prt_printf(&buf, "pointer to %s device %u in key\n",
removed ? "removed" : "nonexistent", dev);
bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
bool print = removed
? bch2_count_fsck_err(c, ptr_to_removed_device, &buf)
@@ -30,7 +31,6 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return ret;
}
@@ -441,9 +441,8 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
struct bch_fs *c = ca->fs;
struct bch_member m;
- mutex_lock(&ca->fs->sb_lock);
- m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
- mutex_unlock(&ca->fs->sb_lock);
+ scoped_guard(mutex, &ca->fs->sb_lock)
+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
printbuf_tabstop_push(out, 12);
@@ -470,16 +469,15 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
void bch2_dev_errors_reset(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- struct bch_member *m;
- mutex_lock(&c->sb_lock);
- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ guard(mutex)(&c->sb_lock);
+
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
/*
@@ -611,7 +609,7 @@ have_slot:
void bch2_sb_members_clean_deleted(struct bch_fs *c)
{
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bool write_sb = false;
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
@@ -625,5 +623,4 @@ void bch2_sb_members_clean_deleted(struct bch_fs *c)
if (write_sb)
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 0d363a1cdd47..35d4ab9b6197 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -133,7 +133,7 @@ static inline void __bch2_dev_put(struct bch_dev *ca)
static inline void bch2_dev_put(struct bch_dev *ca)
{
- if (ca)
+ if (!IS_ERR_OR_NULL(ca))
__bch2_dev_put(ca);
}
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 538c324f4765..08083d6ca8bc 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -152,16 +152,16 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
* here.
*/
if (type == SIX_LOCK_read && lock->readers) {
- preempt_disable();
- this_cpu_inc(*lock->readers); /* signal that we own lock */
+ scoped_guard(preempt) {
+ this_cpu_inc(*lock->readers); /* signal that we own lock */
- smp_mb();
+ smp_mb();
- old = atomic_read(&lock->state);
- ret = !(old & l[type].lock_fail);
+ old = atomic_read(&lock->state);
+ ret = !(old & l[type].lock_fail);
- this_cpu_sub(*lock->readers, !ret);
- preempt_enable();
+ this_cpu_sub(*lock->readers, !ret);
+ }
if (!ret) {
smp_mb();
@@ -360,7 +360,7 @@ static inline bool six_optimistic_spin(struct six_lock *lock,
if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
return false;
- preempt_disable();
+ guard(preempt)();
end_time = sched_clock() + 10 * NSEC_PER_USEC;
while (!need_resched() && six_owner_running(lock)) {
@@ -369,10 +369,8 @@ static inline bool six_optimistic_spin(struct six_lock *lock,
* wait->lock_acquired: pairs with the smp_store_release in
* __six_lock_wakeup
*/
- if (smp_load_acquire(&wait->lock_acquired)) {
- preempt_enable();
+ if (smp_load_acquire(&wait->lock_acquired))
return true;
- }
if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
six_set_bitmask(lock, SIX_LOCK_NOSPIN);
@@ -388,7 +386,6 @@ static inline bool six_optimistic_spin(struct six_lock *lock,
cpu_relax();
}
- preempt_enable();
return false;
}
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 4c43d2a2c1f5..7a801513b134 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -284,12 +284,10 @@ fsck_err:
static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
{
- mutex_lock(&c->snapshot_table_lock);
- int ret = snapshot_t_mut(c, id)
+ guard(mutex)(&c->snapshot_table_lock);
+ return snapshot_t_mut(c, id)
? 0
: bch_err_throw(c, ENOMEM_mark_snapshot);
- mutex_unlock(&c->snapshot_table_lock);
- return ret;
}
static int __bch2_mark_snapshot(struct btree_trans *trans,
@@ -300,15 +298,12 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct snapshot_t *t;
u32 id = new.k->p.offset;
- int ret = 0;
- mutex_lock(&c->snapshot_table_lock);
+ guard(mutex)(&c->snapshot_table_lock);
t = snapshot_t_mut(c, id);
- if (!t) {
- ret = bch_err_throw(c, ENOMEM_mark_snapshot);
- goto err;
- }
+ if (!t)
+ return bch_err_throw(c, ENOMEM_mark_snapshot);
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
@@ -348,9 +343,8 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
} else {
memset(t, 0, sizeof(*t));
}
-err:
- mutex_unlock(&c->snapshot_table_lock);
- return ret;
+
+ return 0;
}
int bch2_mark_snapshot(struct btree_trans *trans,
@@ -481,7 +475,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
struct bkey_s_c_snapshot_tree st;
struct bch_snapshot s;
struct bch_subvolume subvol;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct btree_iter snapshot_iter = {};
u32 root_id;
int ret;
@@ -567,7 +561,6 @@ out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &snapshot_iter);
- printbuf_exit(&buf);
return ret;
}
@@ -580,14 +573,12 @@ fsck_err:
*/
int bch2_check_snapshot_trees(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_snapshot_tree(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ check_snapshot_tree(trans, &iter, k));
}
/*
@@ -706,7 +697,7 @@ static int check_snapshot(struct btree_trans *trans,
struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
u32 i, id;
int ret = 0;
@@ -839,7 +830,6 @@ static int check_snapshot(struct btree_trans *trans,
ret = 0;
err:
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -849,14 +839,12 @@ int bch2_check_snapshots(struct bch_fs *c)
* We iterate backwards as checking/fixing the depth field requires that
* the parent's depth already be correct:
*/
- int ret = bch2_trans_run(c,
- for_each_btree_key_reverse_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_snapshot(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ check_snapshot(trans, &iter, k));
}
static int check_snapshot_exists(struct btree_trans *trans, u32 id)
@@ -980,8 +968,8 @@ static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct
int bch2_reconstruct_snapshots(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct printbuf buf = PRINTBUF;
+ CLASS(btree_trans, trans)(c);
+ CLASS(printbuf, buf)();
struct snapshot_tree_reconstruct r = {};
int ret = 0;
@@ -1023,10 +1011,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
}
fsck_err:
err:
- bch2_trans_put(trans);
snapshot_tree_reconstruct_exit(&r);
- printbuf_exit(&buf);
- bch_err_fn(c, ret);
return ret;
}
@@ -1035,7 +1020,7 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret = 0;
enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
@@ -1083,7 +1068,6 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans,
}
}
fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -1693,7 +1677,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
if (BCH_SNAPSHOT_DELETED(s.v))
return 0;
- mutex_lock(&d->progress_lock);
+ guard(mutex)(&d->progress_lock);
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
@@ -1720,7 +1704,6 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
darray_push(&d->delete_interior, n);
}
}
- mutex_unlock(&d->progress_lock);
return ret;
}
@@ -1825,10 +1808,12 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c)
if (!mutex_trylock(&d->lock))
return 0;
- if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
- goto out_unlock;
+ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) {
+ mutex_unlock(&d->lock);
+ return 0;
+ }
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
/*
* For every snapshot node: If we have no live children and it's not
@@ -1848,11 +1833,10 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_snapshot_delete_nodes_to_text(&buf, d);
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
- printbuf_exit(&buf);
if (ret)
goto err;
}
@@ -1895,19 +1879,16 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
err:
- mutex_lock(&d->progress_lock);
- darray_exit(&d->deleting_from_trees);
- darray_exit(&d->delete_interior);
- darray_exit(&d->delete_leaves);
- d->running = false;
- mutex_unlock(&d->progress_lock);
- bch2_trans_put(trans);
+ scoped_guard(mutex, &d->progress_lock) {
+ darray_exit(&d->deleting_from_trees);
+ darray_exit(&d->delete_interior);
+ darray_exit(&d->delete_leaves);
+ d->running = false;
+ }
bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots);
-out_unlock:
+
mutex_unlock(&d->lock);
- if (!bch2_err_matches(ret, EROFS))
- bch_err_fn(c, ret);
return ret;
}
@@ -1952,11 +1933,10 @@ void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
return;
}
- mutex_lock(&d->progress_lock);
- bch2_snapshot_delete_nodes_to_text(out, d);
-
- bch2_bbpos_to_text(out, d->pos);
- mutex_unlock(&d->progress_lock);
+ scoped_guard(mutex, &d->progress_lock) {
+ bch2_snapshot_delete_nodes_to_text(out, d);
+ bch2_bbpos_to_text(out, d->pos);
+ }
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
@@ -2010,11 +1990,11 @@ int bch2_snapshots_read(struct bch_fs *c)
* Initializing the is_ancestor bitmaps requires ancestors to already be
* initialized - so mark in reverse:
*/
- int ret = bch2_trans_run(c,
- for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
POS_MAX, 0, k,
__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
- bch2_check_snapshot_needs_deletion(trans, k)));
+ bch2_check_snapshot_needs_deletion(trans, k));
bch_err_fn(c, ret);
/*
diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h
index 0ab698f13e5c..a826c9c83c11 100644
--- a/fs/bcachefs/snapshot_types.h
+++ b/fs/bcachefs/snapshot_types.h
@@ -6,7 +6,7 @@
#include "darray.h"
#include "subvolume_types.h"
-typedef DARRAY(u32) snapshot_id_list;
+DEFINE_DARRAY_NAMED(snapshot_id_list, u32);
#define IS_ANCESTOR_BITMAP 128
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index d39fd4261e1b..3e08e55d2dc1 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -125,7 +125,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool need_commit = false;
int ret = 0;
@@ -183,7 +183,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
goto err;
if (!need_commit) {
- struct printbuf buf = PRINTBUF;
+ printbuf_reset(&buf);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n",
@@ -198,7 +198,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1);
#endif
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
ret = bch_err_throw(c, fsck_repair_unimplemented);
goto err;
}
@@ -207,7 +206,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
bch_err_throw(c, transaction_restart_nested);
err:
fsck_err:
- printbuf_exit(&buf);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -244,7 +242,7 @@ int bch2_str_hash_repair_key(struct btree_trans *trans,
bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bool free_snapshots_seen = false;
int ret = 0;
@@ -331,7 +329,6 @@ duplicate_entries:
out:
fsck_err:
bch2_trans_iter_exit(trans, dup_iter);
- printbuf_exit(&buf);
if (free_snapshots_seen)
darray_exit(&s->ids);
return ret;
@@ -346,7 +343,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter iter = {};
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
struct bkey_s_c k;
int ret = 0;
@@ -375,9 +372,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
goto bad_hash;
}
bch2_trans_iter_exit(trans, &iter);
-out:
fsck_err:
- printbuf_exit(&buf);
return ret;
bad_hash:
bch2_trans_iter_exit(trans, &iter);
@@ -386,7 +381,7 @@ bad_hash:
*/
ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info);
if (ret)
- goto out;
+ return ret;
if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: should be at %llu\n%s",
@@ -396,5 +391,5 @@ bad_hash:
k_iter, hash_k,
&iter, bkey_s_c_null,
updated_before_k_pos);
- goto out;
+ return ret;
}
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index 353df662a9b5..2d2d6b22df88 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -17,7 +17,7 @@ static int bch2_subvolume_delete(struct btree_trans *, u32);
static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
{
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "missing subvolume %u", subvolid);
@@ -27,7 +27,6 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
BCH_RECOVERY_PASS_check_inodes, 0);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
return ret;
}
@@ -47,18 +46,18 @@ static int check_subvol(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c_subvolume subvol;
struct btree_iter subvol_children_iter = {};
+ struct bch_subvolume subvol;
struct bch_snapshot snapshot;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
unsigned snapid;
int ret = 0;
if (k.k->type != KEY_TYPE_subvolume)
return 0;
- subvol = bkey_s_c_to_subvolume(k);
- snapid = le32_to_cpu(subvol.v->snapshot);
+ bkey_val_copy(&subvol, bkey_s_c_to_subvolume(k));
+ snapid = le32_to_cpu(subvol.snapshot);
ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
if (bch2_err_matches(ret, ENOENT))
@@ -67,19 +66,19 @@ static int check_subvol(struct btree_trans *trans,
if (ret)
return ret;
- if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
+ if (BCH_SUBVOLUME_UNLINKED(&subvol)) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
return ret ?: bch_err_throw(c, transaction_restart_nested);
}
- if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
- subvol.v->fs_path_parent,
+ if (fsck_err_on(k.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
+ subvol.fs_path_parent,
trans, subvol_root_fs_path_parent_nonzero,
"root subvolume has nonzero fs_path_parent\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct bkey_i_subvolume *n =
- bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto err;
@@ -87,7 +86,7 @@ static int check_subvol(struct btree_trans *trans,
n->v.fs_path_parent = 0;
}
- if (subvol.v->fs_path_parent) {
+ if (subvol.fs_path_parent) {
struct bpos pos = subvolume_children_pos(k);
struct bkey_s_c subvol_children_k =
@@ -111,16 +110,16 @@ static int check_subvol(struct btree_trans *trans,
struct bch_inode_unpacked inode;
ret = bch2_inode_find_by_inum_nowarn_trans(trans,
- (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
+ (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.inode) },
&inode);
if (!ret) {
- if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
+ if (fsck_err_on(inode.bi_subvol != k.k->p.offset,
trans, subvol_root_wrong_bi_subvol,
"subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
inode.bi_inum, inode.bi_snapshot,
- inode.bi_subvol, subvol.k->p.offset)) {
- inode.bi_subvol = subvol.k->p.offset;
- inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot);
+ inode.bi_subvol, k.k->p.offset)) {
+ inode.bi_subvol = k.k->p.offset;
+ inode.bi_snapshot = le32_to_cpu(subvol.snapshot);
ret = __bch2_fsck_write_inode(trans, &inode);
if (ret)
goto err;
@@ -128,8 +127,8 @@ static int check_subvol(struct btree_trans *trans,
} else if (bch2_err_matches(ret, ENOENT)) {
if (fsck_err(trans, subvol_to_missing_root,
"subvolume %llu points to missing subvolume root %llu:%u",
- k.k->p.offset, le64_to_cpu(subvol.v->inode),
- le32_to_cpu(subvol.v->snapshot))) {
+ k.k->p.offset, le64_to_cpu(subvol.inode),
+ le32_to_cpu(subvol.snapshot))) {
/*
* Recreate - any contents that are still disconnected
* will then get reattached under lost+found
@@ -137,10 +136,10 @@ static int check_subvol(struct btree_trans *trans,
bch2_inode_init_early(c, &inode);
bch2_inode_init_late(c, &inode, bch2_current_time(c),
0, 0, S_IFDIR|0700, 0, NULL);
- inode.bi_inum = le64_to_cpu(subvol.v->inode);
- inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot);
+ inode.bi_inum = le64_to_cpu(subvol.inode);
+ inode.bi_snapshot = le32_to_cpu(subvol.snapshot);
inode.bi_subvol = k.k->p.offset;
- inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent);
+ inode.bi_parent_subvol = le32_to_cpu(subvol.fs_path_parent);
ret = __bch2_fsck_write_inode(trans, &inode);
if (ret)
goto err;
@@ -149,8 +148,8 @@ static int check_subvol(struct btree_trans *trans,
goto err;
}
- if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
- u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
+ if (!BCH_SUBVOLUME_SNAP(&subvol)) {
+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.snapshot));
u32 snapshot_tree = bch2_snapshot_tree(c, snapshot_root);
struct bch_snapshot_tree st;
@@ -162,12 +161,12 @@ static int check_subvol(struct btree_trans *trans,
if (ret)
goto err;
- if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != k.k->p.offset,
trans, subvol_not_master_and_not_snapshot,
"subvolume %llu is not set as snapshot but is not master subvolume",
k.k->p.offset)) {
struct bkey_i_subvolume *s =
- bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
ret = PTR_ERR_OR_ZERO(s);
if (ret)
goto err;
@@ -178,19 +177,16 @@ static int check_subvol(struct btree_trans *trans,
err:
fsck_err:
bch2_trans_iter_exit(trans, &subvol_children_iter);
- printbuf_exit(&buf);
return ret;
}
int bch2_check_subvols(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_subvol(trans, &iter, k)));
- bch_err_fn(c, ret);
- return ret;
+ check_subvol(trans, &iter, k));
}
static int check_subvol_child(struct btree_trans *trans,
@@ -219,13 +215,11 @@ fsck_err:
int bch2_check_subvol_children(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- check_subvol_child(trans, &iter, k)));
- bch_err_fn(c, ret);
- return 0;
+ check_subvol_child(trans, &iter, k));
}
/* Subvolumes: */
@@ -348,7 +342,8 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
{
- return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol));
+ CLASS(btree_trans, trans)(c);
+ return lockrestart_do(trans, bch2_subvol_is_ro_trans(trans, subvol));
}
int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
@@ -514,18 +509,22 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
int ret = 0;
while (!ret) {
- mutex_lock(&c->snapshots_unlinked_lock);
- snapshot_id_list s = c->snapshots_unlinked;
- darray_init(&c->snapshots_unlinked);
- mutex_unlock(&c->snapshots_unlinked_lock);
+ snapshot_id_list s;
+
+ scoped_guard(mutex, &c->snapshots_unlinked_lock) {
+ s = c->snapshots_unlinked;
+ darray_init(&c->snapshots_unlinked);
+ }
if (!s.nr)
break;
bch2_evict_subvolume_inodes(c, &s);
+ CLASS(btree_trans, trans)(c);
+
darray_for_each(s, id) {
- ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
+ ret = bch2_subvolume_delete(trans, *id);
bch_err_msg(c, ret, "deleting subvolume %u", *id);
if (ret)
break;
@@ -549,10 +548,9 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
struct bch_fs *c = trans->c;
int ret = 0;
- mutex_lock(&c->snapshots_unlinked_lock);
- if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
- ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
- mutex_unlock(&c->snapshots_unlinked_lock);
+ scoped_guard(mutex, &c->snapshots_unlinked_lock)
+ if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
+ ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
if (ret)
return ret;
@@ -677,7 +675,6 @@ int bch2_initialize_subvolumes(struct bch_fs *c)
struct bkey_i_snapshot_tree root_tree;
struct bkey_i_snapshot root_snapshot;
struct bkey_i_subvolume root_volume;
- int ret;
bkey_snapshot_tree_init(&root_tree.k_i);
root_tree.k.p.offset = 1;
@@ -698,11 +695,9 @@ int bch2_initialize_subvolumes(struct bch_fs *c)
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?:
+ return bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?:
bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0, 0) ?:
bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0, 0);
- bch_err_fn(c, ret);
- return ret;
}
static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
@@ -739,10 +734,9 @@ err:
/* set bi_subvol on root inode */
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
{
- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_fs_upgrade_for_subvolumes(trans));
- bch_err_fn(c, ret);
- return ret;
+ CLASS(btree_trans, trans)(c);
+ return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_fs_upgrade_for_subvolumes(trans));
}
void bch2_fs_subvolumes_init_early(struct bch_fs *c)
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 85e460d10e9d..40fa87ce1d09 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -68,23 +68,21 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta
int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
{
- int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) &&
- version <= c->sb.version_incompat_allowed)
- ? 0
- : -BCH_ERR_may_not_use_incompat_feature;
+ guard(mutex)(&c->sb_lock);
- mutex_lock(&c->sb_lock);
- if (!ret) {
+ if (((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) &&
+ version <= c->sb.version_incompat_allowed)) {
SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
bch2_write_super(c);
+ return 0;
} else {
darray_for_each(c->incompat_versions_requested, i)
if (version == *i)
- goto out;
+ return -BCH_ERR_may_not_use_incompat_feature;
darray_push(&c->incompat_versions_requested, version);
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "requested incompat feature ");
bch2_version_to_text(&buf, version);
prt_str(&buf, " currently not enabled, allowed up to ");
@@ -92,13 +90,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
bch_notice(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ return -BCH_ERR_may_not_use_incompat_feature;
}
-
-out:
- mutex_unlock(&c->sb_lock);
-
- return ret;
}
const char * const bch2_sb_fields[] = {
@@ -203,12 +196,11 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
if (new_bytes > max_bytes) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_bdevname(&buf, sb->bdev);
prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
pr_err("%s", buf.buf);
- printbuf_exit(&buf);
return -BCH_ERR_ENOSPC_sb;
}
}
@@ -783,8 +775,8 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts,
{
u64 offset = opt_get(*opts, sb);
struct bch_sb_layout layout;
- struct printbuf err = PRINTBUF;
- struct printbuf err2 = PRINTBUF;
+ CLASS(printbuf, err)();
+ CLASS(printbuf, err2)();
__le64 *i;
int ret;
#ifndef __KERNEL__
@@ -859,7 +851,6 @@ retry:
else
bch2_print_opts(opts, KERN_ERR "%s", err2.buf);
- printbuf_exit(&err2);
printbuf_reset(&err);
/*
@@ -925,15 +916,14 @@ got_super:
path, err.buf);
goto err_no_print;
}
-out:
- printbuf_exit(&err);
- return ret;
+
+ return 0;
err:
bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
path, err.buf);
err_no_print:
bch2_free_super(sb);
- goto out;
+ return ret;
}
int bch2_read_super(const char *path, struct bch_opts *opts,
@@ -1001,7 +991,12 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
null_nonce(), sb);
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
+ /*
+ * blk-wbt.c throttles all writes except those that have both REQ_SYNC
+ * and REQ_IDLE set...
+ */
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META);
bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
@@ -1019,7 +1014,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
int bch2_write_super(struct bch_fs *c)
{
struct closure *cl = &c->sb_write;
- struct printbuf err = PRINTBUF;
+ CLASS(printbuf, err)();
unsigned sb = 0, nr_wrote;
struct bch_devs_mask sb_written;
bool wrote, can_mount_without_written, can_mount_with_written;
@@ -1101,14 +1096,13 @@ int bch2_write_super(struct bch_fs *c)
goto out;
if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
prt_str(&buf, " > ");
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
prt_str(&buf, ")");
bch2_fs_fatal_error(c, ": %s", buf.buf);
- printbuf_exit(&buf);
ret = bch_err_throw(c, sb_not_downgraded);
goto out;
}
@@ -1129,7 +1123,7 @@ int bch2_write_super(struct bch_fs *c)
continue;
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_char(&buf, ' ');
prt_bdevname(&buf, ca->disk_sb.bdev);
prt_printf(&buf,
@@ -1144,12 +1138,10 @@ int bch2_write_super(struct bch_fs *c)
} else {
bch_err(c, "%s", buf.buf);
}
-
- printbuf_exit(&buf);
}
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_char(&buf, ' ');
prt_bdevname(&buf, ca->disk_sb.bdev);
prt_printf(&buf,
@@ -1157,7 +1149,6 @@ int bch2_write_super(struct bch_fs *c)
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
bch2_fs_fatal_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
ret = bch_err_throw(c, erofs_sb_err);
}
}
@@ -1219,19 +1210,17 @@ out:
darray_for_each(online_devices, ca)
enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super);
darray_exit(&online_devices);
- printbuf_exit(&err);
return ret;
}
void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
{
- mutex_lock(&c->sb_lock);
- if (!(c->sb.features & (1ULL << feat))) {
- c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
+ guard(mutex)(&c->sb_lock);
+ if (!(c->sb.features & BIT_ULL(feat))) {
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BIT_ULL(feat));
bch2_write_super(c);
}
- mutex_unlock(&c->sb_lock);
}
/* Downgrade if superblock is at a higher version than currently supported: */
@@ -1279,11 +1268,12 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
void bch2_sb_upgrade_incompat(struct bch_fs *c)
{
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
+
if (c->sb.version == c->sb.version_incompat_allowed)
- goto unlock;
+ return;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "Now allowing incompatible features up to ");
bch2_version_to_text(&buf, c->sb.version);
@@ -1292,14 +1282,11 @@ void bch2_sb_upgrade_incompat(struct bch_fs *c)
prt_newline(&buf);
bch_notice(c, "%s", buf.buf);
- printbuf_exit(&buf);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version));
bch2_write_super(c);
-unlock:
- mutex_unlock(&c->sb_lock);
}
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
@@ -1365,7 +1352,7 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
enum bch_validate_flags flags, struct printbuf *err)
{
unsigned type = le32_to_cpu(f->type);
- struct printbuf field_err = PRINTBUF;
+ CLASS(printbuf, field_err)();
const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
int ret;
@@ -1377,7 +1364,6 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
bch2_sb_field_to_text(err, sb, f);
}
- printbuf_exit(&field_err);
return ret;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6980cd5b0ca8..0fc0b2221036 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -103,9 +103,32 @@ const char * const bch2_dev_write_refs[] = {
};
#undef x
-static void __bch2_print_str(struct bch_fs *c, const char *prefix,
- const char *str)
+static bool should_print_loglevel(struct bch_fs *c, const char *fmt)
{
+ unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6;
+
+ bool have_soh = fmt[0] == KERN_SOH[0];
+ bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9';
+
+ unsigned loglevel = have_loglevel
+ ? fmt[1] - '0'
+ : c->prev_loglevel;
+
+ if (have_loglevel)
+ c->prev_loglevel = loglevel;
+
+ return loglevel <= loglevel_opt;
+}
+
+void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
+{
+ if (!should_print_loglevel(c, prefix))
+ return;
+
+#ifndef __KERNEL__
+ prefix = "";
+#endif
+
#ifdef __KERNEL__
struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
@@ -114,12 +137,7 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix,
return;
}
#endif
- bch2_print_string_as_lines(KERN_ERR, str);
-}
-
-void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
-{
- __bch2_print_str(c, prefix, str);
+ bch2_print_string_as_lines(prefix, str);
}
__printf(2, 0)
@@ -149,6 +167,14 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
void __bch2_print(struct bch_fs *c, const char *fmt, ...)
{
+ if (!should_print_loglevel(c, fmt))
+ return;
+
+#ifndef __KERNEL__
+ if (fmt[0] == KERN_SOH[0])
+ fmt += 2;
+#endif
+
struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
va_list args;
@@ -241,14 +267,11 @@ static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid)
struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
{
- struct bch_fs *c;
+ guard(mutex)(&bch_fs_list_lock);
- mutex_lock(&bch_fs_list_lock);
- c = __bch2_uuid_to_fs(uuid);
+ struct bch_fs *c = __bch2_uuid_to_fs(uuid);
if (c)
closure_get(&c->cl);
- mutex_unlock(&bch_fs_list_lock);
-
return c;
}
@@ -392,9 +415,8 @@ void bch2_fs_read_only(struct bch_fs *c)
bch2_fs_mark_clean(c);
} else {
/* Make sure error counts/counters are persisted */
- mutex_lock(&c->sb_lock);
+ guard(mutex)(&c->sb_lock);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
bch_verbose(c, "done going read-only, filesystem not clean");
}
@@ -405,9 +427,8 @@ static void bch2_fs_read_only_work(struct work_struct *work)
struct bch_fs *c =
container_of(work, struct bch_fs, read_only_work);
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
bch2_fs_read_only(c);
- up_write(&c->state_lock);
}
static void bch2_fs_read_only_async(struct bch_fs *c)
@@ -487,11 +508,15 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
ret = bch2_fs_init_rw(c);
if (ret)
- goto err;
+ return ret;
ret = bch2_sb_members_v2_init(c);
if (ret)
- goto err;
+ return ret;
+
+ ret = bch2_fs_mark_dirty(c);
+ if (ret)
+ return ret;
clear_bit(BCH_FS_clean_shutdown, &c->flags);
@@ -510,15 +535,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
* overwriting whatever was there previously, and there must always be
* at least one non-flush write in the journal or recovery will fail:
*/
- spin_lock(&c->journal.lock);
- set_bit(JOURNAL_need_flush_write, &c->journal.flags);
- set_bit(JOURNAL_running, &c->journal.flags);
- bch2_journal_space_available(&c->journal);
- spin_unlock(&c->journal.lock);
+ scoped_guard(spinlock, &c->journal.lock) {
+ set_bit(JOURNAL_need_flush_write, &c->journal.flags);
+ set_bit(JOURNAL_running, &c->journal.flags);
+ bch2_journal_space_available(&c->journal);
+ }
- ret = bch2_fs_mark_dirty(c);
- if (ret)
- goto err;
+ /*
+ * Don't jump to our error path, and call bch2_fs_read_only(), unless we
+ * successfully marked the filesystem dirty
+ */
ret = bch2_journal_reclaim_start(&c->journal);
if (ret)
@@ -571,11 +597,8 @@ int bch2_fs_read_write(struct bch_fs *c)
int bch2_fs_read_write_early(struct bch_fs *c)
{
- down_write(&c->state_lock);
- int ret = __bch2_fs_read_write(c, true);
- up_write(&c->state_lock);
-
- return ret;
+ guard(rwsem_write)(&c->state_lock);
+ return __bch2_fs_read_write(c, true);
}
/* Filesystem startup/shutdown: */
@@ -673,9 +696,8 @@ void __bch2_fs_stop(struct bch_fs *c)
set_bit(BCH_FS_stopping, &c->flags);
- down_write(&c->state_lock);
- bch2_fs_read_only(c);
- up_write(&c->state_lock);
+ scoped_guard(rwsem_write, &c->state_lock)
+ bch2_fs_read_only(c);
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
@@ -711,9 +733,8 @@ void __bch2_fs_stop(struct bch_fs *c)
void bch2_fs_free(struct bch_fs *c)
{
- mutex_lock(&bch_fs_list_lock);
- list_del(&c->list);
- mutex_unlock(&bch_fs_list_lock);
+ scoped_guard(mutex, &bch_fs_list_lock)
+ list_del(&c->list);
closure_sync(&c->cl);
closure_debug_destroy(&c->cl);
@@ -775,21 +796,19 @@ static int bch2_fs_online(struct bch_fs *c)
return ret;
}
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
for_each_member_device(c, ca) {
ret = bch2_dev_sysfs_online(c, ca);
if (ret) {
bch_err(c, "error creating sysfs objects");
bch2_dev_put(ca);
- goto err;
+ return ret;
}
}
BUG_ON(!list_empty(&c->list));
list_add(&c->list, &bch_fs_list);
-err:
- up_write(&c->state_lock);
return ret;
}
@@ -826,8 +845,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch_sb_handles *sbs)
{
struct bch_fs *c;
- struct printbuf name = PRINTBUF;
unsigned i, iter_size;
+ CLASS(printbuf, name)();
int ret = 0;
c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
@@ -914,9 +933,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
if (ret)
goto err;
- mutex_lock(&c->sb_lock);
- ret = bch2_sb_to_fs(c, sb);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock)
+ ret = bch2_sb_to_fs(c, sb);
if (ret)
goto err;
@@ -968,7 +986,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
goto err;
strscpy(c->name, name.buf, sizeof(c->name));
- printbuf_exit(&name);
iter_size = sizeof(struct sort_iter) +
(btree_blocks(c) + 1) * 2 *
@@ -1060,12 +1077,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
&c->clock_journal_res,
(sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
- mutex_lock(&bch_fs_list_lock);
- ret = bch2_fs_online(c);
- mutex_unlock(&bch_fs_list_lock);
+ scoped_guard(mutex, &bch_fs_list_lock)
+ ret = bch2_fs_online(c);
if (ret)
goto err;
+
+ c->recovery_task = current;
out:
return c;
err:
@@ -1133,8 +1151,8 @@ static bool bch2_fs_may_start(struct bch_fs *c)
case BCH_DEGRADED_yes:
flags |= BCH_FORCE_IF_DEGRADED;
break;
- default:
- mutex_lock(&c->sb_lock);
+ default: {
+ guard(mutex)(&c->sb_lock);
for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
@@ -1143,13 +1161,11 @@ static bool bch2_fs_may_start(struct bch_fs *c)
if (!bch2_dev_is_online(ca) &&
(ca->mi.state == BCH_MEMBER_STATE_rw ||
- ca->mi.state == BCH_MEMBER_STATE_ro)) {
- mutex_unlock(&c->sb_lock);
+ ca->mi.state == BCH_MEMBER_STATE_ro))
return false;
- }
}
- mutex_unlock(&c->sb_lock);
break;
+ }
}
return bch2_have_enough_devs(c, c->online_devs, flags, true);
@@ -1160,6 +1176,8 @@ int bch2_fs_start(struct bch_fs *c)
time64_t now = ktime_get_real_seconds();
int ret = 0;
+ BUG_ON(test_bit(BCH_FS_started, &c->flags));
+
print_mount_opts(c);
if (c->cf_encoding)
@@ -1171,44 +1189,29 @@ int bch2_fs_start(struct bch_fs *c)
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
- down_write(&c->state_lock);
- mutex_lock(&c->sb_lock);
+ scoped_guard(rwsem_write, &c->state_lock) {
+ guard(mutex)(&c->sb_lock);
+ if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
+ sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
+ ret = bch_err_throw(c, ENOSPC_sb);
+ goto err;
+ }
- BUG_ON(test_bit(BCH_FS_started, &c->flags));
+ ret = bch2_sb_members_v2_init(c);
+ if (ret)
+ goto err;
- if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
- sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
- mutex_unlock(&c->sb_lock);
- up_write(&c->state_lock);
- ret = bch_err_throw(c, ENOSPC_sb);
- goto err;
- }
+ scoped_guard(rcu)
+ for_each_online_member_rcu(c, ca) {
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(now);
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ bch2_dev_allocator_add(c, ca);
+ }
- ret = bch2_sb_members_v2_init(c);
- if (ret) {
- mutex_unlock(&c->sb_lock);
- up_write(&c->state_lock);
- goto err;
+ bch2_recalc_capacity(c);
}
- scoped_guard(rcu)
- for_each_online_member_rcu(c, ca)
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
- cpu_to_le64(now);
-
- /*
- * Dno't write superblock yet: recovery might have to downgrade
- */
- mutex_unlock(&c->sb_lock);
-
- scoped_guard(rcu)
- for_each_online_member_rcu(c, ca)
- if (ca->mi.state == BCH_MEMBER_STATE_rw)
- bch2_dev_allocator_add(c, ca);
- bch2_recalc_capacity(c);
- up_write(&c->state_lock);
-
- c->recovery_task = current;
ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
? bch2_fs_recovery(c)
: bch2_fs_initialize(c);
@@ -1229,13 +1232,12 @@ int bch2_fs_start(struct bch_fs *c)
set_bit(BCH_FS_started, &c->flags);
wake_up(&c->ro_ref_wait);
- down_write(&c->state_lock);
- if (c->opts.read_only)
- bch2_fs_read_only(c);
- else if (!test_bit(BCH_FS_rw, &c->flags))
- ret = bch2_fs_read_write(c);
- up_write(&c->state_lock);
-
+ scoped_guard(rwsem_write, &c->state_lock) {
+ if (c->opts.read_only)
+ bch2_fs_read_only(c);
+ else if (!test_bit(BCH_FS_rw, &c->flags))
+ ret = bch2_fs_read_write(c);
+ }
err:
if (ret)
bch_err_msg(c, ret, "starting filesystem");
@@ -1280,7 +1282,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
if (fs->sb->seq == sb->sb->seq &&
fs->sb->write_time != sb->sb->write_time) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "Split brain detected between ");
prt_bdevname(&buf, sb->bdev);
@@ -1305,7 +1307,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
prt_printf(&buf, "Not using older sb");
pr_err("%s", buf.buf);
- printbuf_exit(&buf);
if (!opts->no_splitbrain_check)
return -BCH_ERR_device_splitbrain;
@@ -1316,7 +1317,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
u64 seq_from_member = le64_to_cpu(sb->sb->seq);
if (seq_from_fs && seq_from_fs < seq_from_member) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
prt_str(&buf, "Split brain detected between ");
prt_bdevname(&buf, sb->bdev);
@@ -1338,7 +1339,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
}
pr_err("%s", buf.buf);
- printbuf_exit(&buf);
if (!opts->no_splitbrain_check)
return -BCH_ERR_device_splitbrain;
@@ -1553,18 +1553,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
struct bch_dev *ca = NULL;
if (bch2_fs_init_fault("dev_alloc"))
- goto err;
+ return bch_err_throw(c, ENOMEM_dev_alloc);
ca = __bch2_dev_alloc(c, &member);
if (!ca)
- goto err;
+ return bch_err_throw(c, ENOMEM_dev_alloc);
ca->fs = c;
bch2_dev_attach(c, ca, dev_idx);
return 0;
-err:
- return bch_err_throw(c, ENOMEM_dev_alloc);
}
static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
@@ -1579,7 +1577,10 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
if (get_capacity(sb->bdev->bd_disk) <
ca->mi.bucket_size * ca->mi.nbuckets) {
- bch_err(ca, "cannot online: device too small");
+ bch_err(ca, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)",
+ get_capacity(sb->bdev->bd_disk),
+ ca->mi.bucket_size * ca->mi.nbuckets,
+ ca->mi.nbuckets);
return bch_err_throw(ca->fs, device_size_too_small);
}
@@ -1590,10 +1591,9 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
if (ret)
return ret;
- struct printbuf name = PRINTBUF;
+ CLASS(printbuf, name)();
prt_bdevname(&name, sb->bdev);
strscpy(ca->name, name.buf, sizeof(ca->name));
- printbuf_exit(&name);
/* Commit: */
ca->disk_sb = *sb;
@@ -1725,7 +1725,6 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
- struct bch_member *m;
int ret = 0;
if (ca->mi.state == new_state)
@@ -1739,11 +1738,11 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
bch_notice(ca, "%s", bch2_member_states[new_state]);
- mutex_lock(&c->sb_lock);
- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- SET_BCH_MEMBER_STATE(m, new_state);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_STATE(m, new_state);
+ bch2_write_super(c);
+ }
if (new_state == BCH_MEMBER_STATE_rw)
__bch2_dev_read_write(c, ca);
@@ -1756,26 +1755,20 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
- int ret;
-
- down_write(&c->state_lock);
- ret = __bch2_dev_set_state(c, ca, new_state, flags);
- up_write(&c->state_lock);
-
- return ret;
+ guard(rwsem_write)(&c->state_lock);
+ return __bch2_dev_set_state(c, ca, new_state, flags);
}
/* Device add/removal: */
int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
- struct bch_member *m;
unsigned dev_idx = ca->dev_idx, data;
bool fast_device_removal = !bch2_request_incompat_feature(c,
bcachefs_metadata_version_fast_device_removal);
int ret;
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
/*
* We consume a reference to ca->ref, regardless of whether we succeed
@@ -1842,20 +1835,17 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
data = bch2_dev_has_data(c, ca);
if (data) {
- struct printbuf data_has = PRINTBUF;
-
+ CLASS(printbuf, data_has)();
prt_bitflags(&data_has, __bch2_data_types, data);
bch_err(ca, "Remove failed, still has data (%s)", data_has.buf);
- printbuf_exit(&data_has);
ret = -EBUSY;
goto err;
}
__bch2_dev_offline(c, ca);
- mutex_lock(&c->sb_lock);
- rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock)
+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
#ifndef CONFIG_BCACHEFS_DEBUG
percpu_ref_kill(&ca->ref);
@@ -1871,25 +1861,23 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
* Free this device's slot in the bch_member array - all pointers to
* this device must be gone:
*/
- mutex_lock(&c->sb_lock);
- m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
- if (fast_device_removal)
- m->uuid = BCH_SB_MEMBER_DELETED_UUID;
- else
- memset(&m->uuid, 0, sizeof(m->uuid));
+ if (fast_device_removal)
+ m->uuid = BCH_SB_MEMBER_DELETED_UUID;
+ else
+ memset(&m->uuid, 0, sizeof(m->uuid));
- bch2_write_super(c);
+ bch2_write_super(c);
+ }
- mutex_unlock(&c->sb_lock);
- up_write(&c->state_lock);
return 0;
err:
if (test_bit(BCH_FS_rw, &c->flags) &&
ca->mi.state == BCH_MEMBER_STATE_rw &&
!enumerated_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
- up_write(&c->state_lock);
return ret;
}
@@ -1899,8 +1887,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
struct bch_opts opts = bch2_opts_empty();
struct bch_sb_handle sb = {};
struct bch_dev *ca = NULL;
- struct printbuf errbuf = PRINTBUF;
- struct printbuf label = PRINTBUF;
+ CLASS(printbuf, label)();
int ret = 0;
ret = bch2_read_super(path, &opts, &sb);
@@ -1919,12 +1906,12 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
}
if (list_empty(&c->list)) {
- mutex_lock(&bch_fs_list_lock);
- if (__bch2_uuid_to_fs(c->sb.uuid))
- ret = bch_err_throw(c, filesystem_uuid_already_open);
- else
- list_add(&c->list, &bch_fs_list);
- mutex_unlock(&bch_fs_list_lock);
+ scoped_guard(mutex, &bch_fs_list_lock) {
+ if (__bch2_uuid_to_fs(c->sb.uuid))
+ ret = bch_err_throw(c, filesystem_uuid_already_open);
+ else
+ list_add(&c->list, &bch_fs_list);
+ }
if (ret) {
bch_err(c, "filesystem UUID already open");
@@ -1946,101 +1933,95 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- down_write(&c->state_lock);
- mutex_lock(&c->sb_lock);
- SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true);
+ scoped_guard(rwsem_write, &c->state_lock) {
+ scoped_guard(mutex, &c->sb_lock) {
+ SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true);
- ret = bch2_sb_from_fs(c, ca);
- bch_err_msg(c, ret, "setting up new superblock");
- if (ret)
- goto err_unlock;
+ ret = bch2_sb_from_fs(c, ca);
+ bch_err_msg(c, ret, "setting up new superblock");
+ if (ret)
+ goto err;
- if (dynamic_fault("bcachefs:add:no_slot"))
- goto err_unlock;
+ if (dynamic_fault("bcachefs:add:no_slot"))
+ goto err;
- ret = bch2_sb_member_alloc(c);
- if (ret < 0) {
- bch_err_msg(c, ret, "setting up new superblock");
- goto err_unlock;
- }
- unsigned dev_idx = ret;
- ret = 0;
+ ret = bch2_sb_member_alloc(c);
+ if (ret < 0) {
+ bch_err_msg(c, ret, "setting up new superblock");
+ goto err;
+ }
+ unsigned dev_idx = ret;
+ ret = 0;
- /* success: */
+ /* success: */
- dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds());
- *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi;
+ dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds());
+ *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi;
- ca->disk_sb.sb->dev_idx = dev_idx;
- bch2_dev_attach(c, ca, dev_idx);
+ ca->disk_sb.sb->dev_idx = dev_idx;
+ bch2_dev_attach(c, ca, dev_idx);
- if (BCH_MEMBER_GROUP(&dev_mi)) {
- ret = __bch2_dev_group_set(c, ca, label.buf);
- bch_err_msg(c, ret, "creating new label");
- if (ret)
- goto err_unlock;
- }
+ set_bit(ca->dev_idx, c->online_devs.d);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ if (BCH_MEMBER_GROUP(&dev_mi)) {
+ ret = __bch2_dev_group_set(c, ca, label.buf);
+ bch_err_msg(c, ret, "creating new label");
+ if (ret)
+ goto err_late;
+ }
- if (test_bit(BCH_FS_started, &c->flags)) {
- ret = bch2_dev_usage_init(ca, false);
- if (ret)
- goto err_late;
+ bch2_write_super(c);
+ }
- ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
- bch_err_msg(ca, ret, "marking new superblock");
+ ret = bch2_dev_usage_init(ca, false);
if (ret)
goto err_late;
- ret = bch2_fs_freespace_init(c);
- bch_err_msg(ca, ret, "initializing free space");
- if (ret)
- goto err_late;
+ if (test_bit(BCH_FS_started, &c->flags)) {
+ ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
+ bch_err_msg(ca, ret, "marking new superblock");
+ if (ret)
+ goto err_late;
- if (ca->mi.state == BCH_MEMBER_STATE_rw)
- __bch2_dev_read_write(c, ca);
+ ret = bch2_fs_freespace_init(c);
+ bch_err_msg(ca, ret, "initializing free space");
+ if (ret)
+ goto err_late;
- ret = bch2_dev_journal_alloc(ca, false);
- bch_err_msg(c, ret, "allocating journal");
- if (ret)
- goto err_late;
- }
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
- /*
- * We just changed the superblock UUID, invalidate cache and send a
- * uevent to update /dev/disk/by-uuid
- */
- invalidate_bdev(ca->disk_sb.bdev);
+ ret = bch2_dev_journal_alloc(ca, false);
+ bch_err_msg(c, ret, "allocating journal");
+ if (ret)
+ goto err_late;
+ }
- char uuid_str[37];
- snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid);
+ /*
+ * We just changed the superblock UUID, invalidate cache and send a
+ * uevent to update /dev/disk/by-uuid
+ */
+ invalidate_bdev(ca->disk_sb.bdev);
- char *envp[] = {
- "CHANGE=uuid",
- uuid_str,
- NULL,
- };
- kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp);
+ char uuid_str[37];
+ snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid);
- up_write(&c->state_lock);
+ char *envp[] = {
+ "CHANGE=uuid",
+ uuid_str,
+ NULL,
+ };
+ kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp);
+ }
out:
- printbuf_exit(&label);
- printbuf_exit(&errbuf);
bch_err_fn(c, ret);
return ret;
-
-err_unlock:
- mutex_unlock(&c->sb_lock);
- up_write(&c->state_lock);
err:
if (ca)
bch2_dev_free(ca);
bch2_free_super(&sb);
goto out;
err_late:
- up_write(&c->state_lock);
ca = NULL;
goto err;
}
@@ -2054,13 +2035,11 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
unsigned dev_idx;
int ret;
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
ret = bch2_read_super(path, &opts, &sb);
- if (ret) {
- up_write(&c->state_lock);
+ if (ret)
return ret;
- }
dev_idx = sb.sb->dev_idx;
@@ -2097,39 +2076,33 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
goto err;
}
- mutex_lock(&c->sb_lock);
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
- cpu_to_le64(ktime_get_real_seconds());
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ scoped_guard(mutex, &c->sb_lock) {
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(ktime_get_real_seconds());
+ bch2_write_super(c);
+ }
- up_write(&c->state_lock);
return 0;
err:
- up_write(&c->state_lock);
bch2_free_super(&sb);
return ret;
}
int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
{
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
if (!bch2_dev_is_online(ca)) {
bch_err(ca, "Already offline");
- up_write(&c->state_lock);
return 0;
}
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
bch_err(ca, "Cannot offline required disk");
- up_write(&c->state_lock);
return bch_err_throw(c, device_state_not_allowed);
}
__bch2_dev_offline(c, ca);
-
- up_write(&c->state_lock);
return 0;
}
@@ -2147,60 +2120,54 @@ static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new
int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
- struct bch_member *m;
u64 old_nbuckets;
int ret = 0;
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
old_nbuckets = ca->mi.nbuckets;
if (nbuckets < ca->mi.nbuckets) {
bch_err(ca, "Cannot shrink yet");
- ret = -EINVAL;
- goto err;
+ return -EINVAL;
}
if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) {
bch_err(ca, "New device size too big (%llu greater than max %u)",
nbuckets, BCH_MEMBER_NBUCKETS_MAX);
- ret = bch_err_throw(c, device_size_too_big);
- goto err;
+ return bch_err_throw(c, device_size_too_big);
}
if (bch2_dev_is_online(ca) &&
get_capacity(ca->disk_sb.bdev->bd_disk) <
ca->mi.bucket_size * nbuckets) {
bch_err(ca, "New size larger than device");
- ret = bch_err_throw(c, device_size_too_small);
- goto err;
+ return bch_err_throw(c, device_size_too_small);
}
ret = bch2_dev_buckets_resize(c, ca, nbuckets);
bch_err_msg(ca, ret, "resizing buckets");
if (ret)
- goto err;
+ return ret;
ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
if (ret)
- goto err;
+ return ret;
- mutex_lock(&c->sb_lock);
- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- m->nbuckets = cpu_to_le64(nbuckets);
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ m->nbuckets = cpu_to_le64(nbuckets);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ bch2_write_super(c);
+ }
if (ca->mi.freespace_initialized) {
ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets);
if (ret)
- goto err;
+ return ret;
}
bch2_recalc_capacity(c);
-err:
- up_write(&c->state_lock);
- return ret;
+ return 0;
}
int bch2_fs_resize_on_mount(struct bch_fs *c)
@@ -2218,26 +2185,24 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
if (ret) {
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_fs_resize_on_mount);
- up_write(&c->state_lock);
return ret;
}
- mutex_lock(&c->sb_lock);
- struct bch_member *m =
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- m->nbuckets = cpu_to_le64(new_nbuckets);
- SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false);
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_member *m =
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ m->nbuckets = cpu_to_le64(new_nbuckets);
+ SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false);
- c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image));
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image));
+ bch2_write_super(c);
+ }
if (ca->mi.freespace_initialized) {
ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets);
if (ret) {
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_fs_resize_on_mount);
- up_write(&c->state_lock);
return ret;
}
}
@@ -2276,6 +2241,10 @@ static struct bch_fs *bdev_get_fs(struct block_device *bdev)
return c;
}
+DEFINE_CLASS(bdev_get_fs, struct bch_fs *,
+ bch2_ro_ref_put(_T), bdev_get_fs(bdev),
+ struct block_device *bdev);
+
/* returns with ref on ca->ref */
static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev)
{
@@ -2287,7 +2256,7 @@ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bd
static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
{
- struct bch_fs *c = bdev_get_fs(bdev);
+ CLASS(bdev_get_fs, c)(bdev);
if (!c)
return;
@@ -2301,48 +2270,45 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
down_read(&sb->s_umount);
}
- down_write(&c->state_lock);
+ guard(rwsem_write)(&c->state_lock);
+
struct bch_dev *ca = bdev_to_bch_dev(c, bdev);
- if (!ca)
- goto unlock;
+ if (ca) {
+ bool dev = bch2_dev_state_allowed(c, ca,
+ BCH_MEMBER_STATE_failed,
+ BCH_FORCE_IF_DEGRADED);
+
+ if (!dev && sb) {
+ if (!surprise)
+ sync_filesystem(sb);
+ shrink_dcache_sb(sb);
+ evict_inodes(sb);
+ }
- bool dev = bch2_dev_state_allowed(c, ca,
- BCH_MEMBER_STATE_failed,
- BCH_FORCE_IF_DEGRADED);
+ CLASS(printbuf, buf)();
+ __bch2_log_msg_start(ca->name, &buf);
- if (!dev && sb) {
- if (!surprise)
- sync_filesystem(sb);
- shrink_dcache_sb(sb);
- evict_inodes(sb);
- }
+ prt_printf(&buf, "offline from block layer");
- struct printbuf buf = PRINTBUF;
- __bch2_log_msg_start(ca->name, &buf);
+ if (dev) {
+ __bch2_dev_offline(c, ca);
+ } else {
+ bch2_journal_flush(&c->journal);
+ bch2_fs_emergency_read_only2(c, &buf);
+ }
- prt_printf(&buf, "offline from block layer");
+ bch2_print_str(c, KERN_ERR, buf.buf);
- if (dev) {
- __bch2_dev_offline(c, ca);
- } else {
- bch2_journal_flush(&c->journal);
- bch2_fs_emergency_read_only2(c, &buf);
+ bch2_dev_put(ca);
}
- bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
-
- bch2_dev_put(ca);
-unlock:
if (sb)
up_read(&sb->s_umount);
- up_write(&c->state_lock);
- bch2_ro_ref_put(c);
}
static void bch2_fs_bdev_sync(struct block_device *bdev)
{
- struct bch_fs *c = bdev_get_fs(bdev);
+ CLASS(bdev_get_fs, c)(bdev);
if (!c)
return;
@@ -2353,12 +2319,9 @@ static void bch2_fs_bdev_sync(struct block_device *bdev)
* unmounted - we only take this to avoid a warning in
* sync_filesystem:
*/
- down_read(&sb->s_umount);
+ guard(rwsem_read)(&sb->s_umount);
sync_filesystem(sb);
- up_read(&sb->s_umount);
}
-
- bch2_ro_ref_put(c);
}
const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
@@ -2380,7 +2343,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
bch_sb_handles sbs = {};
struct bch_fs *c = NULL;
struct bch_sb_handle *best = NULL;
- struct printbuf errbuf = PRINTBUF;
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -2435,15 +2397,12 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
if (ret)
goto err;
- down_write(&c->state_lock);
- darray_for_each(sbs, sb) {
- ret = bch2_dev_attach_bdev(c, sb);
- if (ret) {
- up_write(&c->state_lock);
- goto err;
+ scoped_guard(rwsem_write, &c->state_lock)
+ darray_for_each(sbs, sb) {
+ ret = bch2_dev_attach_bdev(c, sb);
+ if (ret)
+ goto err;
}
- }
- up_write(&c->state_lock);
if (!c->opts.nostart) {
ret = bch2_fs_start(c);
@@ -2454,7 +2413,6 @@ out:
darray_for_each(sbs, sb)
bch2_free_super(sb);
darray_exit(&sbs);
- printbuf_exit(&errbuf);
module_put(THIS_MODULE);
return c;
err_print:
@@ -2526,6 +2484,8 @@ static int bch2_param_get_static_key_t(char *buffer, const struct kernel_param *
return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y');
}
+/* this is unused in userspace - silence the warning */
+__maybe_unused
static const struct kernel_param_ops bch2_param_ops_static_key_t = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = bch2_param_set_static_key_t,
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 50cf51654265..bd3fa9c3372d 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -18,6 +18,7 @@
#include "btree_key_cache.h"
#include "btree_update.h"
#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
#include "btree_gc.h"
#include "buckets.h"
#include "clock.h"
@@ -62,7 +63,7 @@ static ssize_t fn ## _to_text(struct printbuf *, \
static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
char *buf) \
{ \
- struct printbuf out = PRINTBUF; \
+ CLASS(printbuf, out)(); \
ssize_t ret = fn ## _to_text(&out, kobj, attr); \
\
if (out.pos && out.buf[out.pos - 1] != '\n') \
@@ -75,7 +76,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
memcpy(buf, out.buf, ret); \
} \
- printbuf_exit(&out); \
return bch2_err_class(ret); \
} \
\
@@ -151,6 +151,7 @@ write_attribute(trigger_journal_flush);
write_attribute(trigger_journal_writes);
write_attribute(trigger_btree_cache_shrink);
write_attribute(trigger_btree_key_cache_shrink);
+write_attribute(trigger_btree_write_buffer_flush);
write_attribute(trigger_btree_updates);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_recalc_capacity);
@@ -172,7 +173,9 @@ read_attribute(io_latency_read);
read_attribute(io_latency_write);
read_attribute(io_latency_stats_read);
read_attribute(io_latency_stats_write);
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
read_attribute(congested);
+#endif
read_attribute(btree_write_stats);
@@ -233,14 +236,13 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
size_t ret = 0;
struct btree *b;
- mutex_lock(&bc->lock);
+ guard(mutex)(&bc->lock);
list_for_each_entry(b, &bc->live[0].list, list)
ret += btree_buf_bytes(b);
list_for_each_entry(b, &bc->live[1].list, list)
ret += btree_buf_bytes(b);
list_for_each_entry(b, &bc->freeable, list)
ret += btree_buf_bytes(b);
- mutex_unlock(&bc->lock);
return ret;
}
@@ -539,6 +541,11 @@ STORE(bch2_fs)
c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc);
}
+ if (attr == &sysfs_trigger_btree_write_buffer_flush)
+ bch2_trans_do(c,
+ (bch2_btree_write_buffer_flush_sync(trans),
+ bch2_trans_begin(trans)));
+
if (attr == &sysfs_trigger_gc)
bch2_gc_gens(c);
@@ -563,9 +570,8 @@ STORE(bch2_fs)
closure_wake_up(&c->freelist_wait);
if (attr == &sysfs_trigger_recalc_capacity) {
- down_read(&c->state_lock);
+ guard(rwsem_read)(&c->state_lock);
bch2_recalc_capacity(c);
- up_read(&c->state_lock);
}
if (attr == &sysfs_trigger_delete_dead_snapshots)
@@ -710,6 +716,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_journal_writes,
&sysfs_trigger_btree_cache_shrink,
&sysfs_trigger_btree_key_cache_shrink,
+ &sysfs_trigger_btree_write_buffer_flush,
&sysfs_trigger_btree_updates,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_recalc_capacity,
@@ -942,9 +949,10 @@ SHOW(bch2_dev)
if (attr == &sysfs_io_latency_stats_write)
bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats);
- sysfs_printf(congested, "%u%%",
- clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
- * 100 / CONGESTED_MAX);
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+ if (attr == &sysfs_congested)
+ bch2_dev_congested_to_text(out, ca);
+#endif
if (attr == &sysfs_alloc_debug)
bch2_dev_alloc_debug_to_text(out, ca);
@@ -1015,7 +1023,9 @@ struct attribute *bch2_dev_files[] = {
&sysfs_io_latency_write,
&sysfs_io_latency_stats_read,
&sysfs_io_latency_stats_write,
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
&sysfs_congested,
+#endif
&sysfs_read_fua_test,
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index 782a05fe7656..ea27df30cfcb 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -31,7 +31,7 @@ static void delete_test_keys(struct bch_fs *c)
static int test_delete(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_i_cookie k;
int ret;
@@ -66,13 +66,12 @@ static int test_delete(struct bch_fs *c, u64 nr)
goto err;
err:
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
static int test_delete_written(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_i_cookie k;
int ret;
@@ -101,7 +100,6 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
goto err;
err:
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
@@ -130,13 +128,14 @@ static int test_iterate(struct bch_fs *c, u64 nr)
pr_info("iterating forwards");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- 0, k, ({
+ CLASS(btree_trans, trans)(c);
+
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
BUG_ON(k.k->p.offset != i++);
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards");
if (ret)
return ret;
@@ -145,12 +144,11 @@ static int test_iterate(struct bch_fs *c, u64 nr)
pr_info("iterating backwards");
- ret = bch2_trans_run(c,
- for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs,
+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs,
SPOS(0, U64_MAX, U32_MAX), 0, k, ({
BUG_ON(k.k->p.offset != --i);
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating backwards");
if (ret)
return ret;
@@ -185,14 +183,15 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
pr_info("iterating forwards");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- 0, k, ({
+ CLASS(btree_trans, trans)(c);
+
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
BUG_ON(bkey_start_offset(k.k) != i);
i = k.k->p.offset;
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards");
if (ret)
return ret;
@@ -201,13 +200,12 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
pr_info("iterating backwards");
- ret = bch2_trans_run(c,
- for_each_btree_key_reverse(trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents,
SPOS(0, U64_MAX, U32_MAX), 0, k, ({
BUG_ON(k.k->p.offset != i);
i = bkey_start_offset(k.k);
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating backwards");
if (ret)
return ret;
@@ -241,14 +239,15 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
pr_info("iterating forwards");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- 0, k, ({
+ CLASS(btree_trans, trans)(c);
+
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
BUG_ON(k.k->p.offset != i);
i += 2;
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards");
if (ret)
return ret;
@@ -258,10 +257,9 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
pr_info("iterating forwards by slots");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- BTREE_ITER_slots, k, ({
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ BTREE_ITER_slots, k, ({
if (i >= nr * 2)
break;
@@ -270,7 +268,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i++;
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards by slots");
return ret;
}
@@ -301,15 +299,16 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
pr_info("iterating forwards");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- 0, k, ({
+ CLASS(btree_trans, trans)(c);
+
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
BUG_ON(bkey_start_offset(k.k) != i + 8);
BUG_ON(k.k->size != 8);
i += 16;
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards");
if (ret)
return ret;
@@ -319,10 +318,9 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
pr_info("iterating forwards by slots");
i = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
- BTREE_ITER_slots, k, ({
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ BTREE_ITER_slots, k, ({
if (i == nr)
break;
BUG_ON(bkey_deleted(k.k) != !(i % 16));
@@ -331,7 +329,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
BUG_ON(k.k->size != 8);
i = k.k->p.offset;
0;
- })));
+ }));
bch_err_msg(c, ret, "error iterating forwards by slots");
return ret;
}
@@ -344,7 +342,7 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
{
delete_test_keys(c);
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_s_c k;
@@ -358,7 +356,6 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
BUG_ON(k.k);
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return 0;
}
@@ -366,7 +363,7 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
{
delete_test_keys(c);
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_s_c k;
@@ -380,7 +377,6 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
BUG_ON(k.k);
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return 0;
}
@@ -392,15 +388,13 @@ static int insert_test_extent(struct bch_fs *c,
u64 start, u64 end)
{
struct bkey_i_cookie k;
- int ret;
-
bkey_cookie_init(&k.k_i);
k.k_i.k.p.offset = end;
k.k_i.k.p.snapshot = U32_MAX;
k.k_i.k.size = end - start;
k.k_i.k.bversion.lo = test_version++;
- ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0);
+ int ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0);
bch_err_fn(c, ret);
return ret;
}
@@ -446,15 +440,14 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
{
struct bkey_i_cookie k;
- int ret;
-
bkey_cookie_init(&k.k_i);
k.k_i.k.p.inode = inum;
k.k_i.k.p.offset = start + len;
k.k_i.k.p.snapshot = snapid;
k.k_i.k.size = len;
- ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ CLASS(btree_trans, trans)(c);
+ int ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i,
BTREE_UPDATE_internal_snapshot_node));
bch_err_fn(c, ret);
@@ -477,7 +470,6 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
/* Test skipping over keys in unrelated snapshots: */
static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
{
- struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i_cookie cookie;
@@ -489,7 +481,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
if (ret)
return ret;
- trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, snapid_lo), 0);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
@@ -497,28 +489,28 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
BUG_ON(k.k->p.snapshot != U32_MAX);
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
static int test_snapshots(struct bch_fs *c, u64 nr)
{
struct bkey_i_cookie cookie;
- u32 snapids[2];
- u32 snapid_subvols[2] = { 1, 1 };
- int ret;
-
bkey_cookie_init(&cookie.k_i);
cookie.k.p.snapshot = U32_MAX;
- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0);
+
+ int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0);
if (ret)
return ret;
- ret = bch2_trans_commit_do(c, NULL, NULL, 0,
- bch2_snapshot_node_create(trans, U32_MAX,
- snapids,
- snapid_subvols,
- 2));
+ u32 snapids[2];
+ u32 snapid_subvols[2] = { 1, 1 };
+
+ CLASS(btree_trans, trans)(c);
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_create(trans, U32_MAX,
+ snapids,
+ snapid_subvols,
+ 2));
if (ret)
return ret;
@@ -542,42 +534,37 @@ static u64 test_rand(void)
static int rand_insert(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct bkey_i_cookie k;
- int ret = 0;
- u64 i;
+ CLASS(btree_trans, trans)(c);
- for (i = 0; i < nr; i++) {
+ for (u64 i = 0; i < nr; i++) {
+ struct bkey_i_cookie k;
bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand();
k.k.p.snapshot = U32_MAX;
- ret = commit_do(trans, NULL, NULL, 0,
+ int ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0));
if (ret)
- break;
+ return ret;
}
- bch2_trans_put(trans);
- return ret;
+ return 0;
}
static int rand_insert_multi(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bkey_i_cookie k[8];
- int ret = 0;
unsigned j;
- u64 i;
- for (i = 0; i < nr; i += ARRAY_SIZE(k)) {
+ for (u64 i = 0; i < nr; i += ARRAY_SIZE(k)) {
for (j = 0; j < ARRAY_SIZE(k); j++) {
bkey_cookie_init(&k[j].k_i);
k[j].k.p.offset = test_rand();
k[j].k.p.snapshot = U32_MAX;
}
- ret = commit_do(trans, NULL, NULL, 0,
+ int ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
@@ -587,25 +574,23 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0));
if (ret)
- break;
+ return ret;
}
- bch2_trans_put(trans);
- return ret;
+ return 0;
}
static int rand_lookup(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- u64 i;
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
- for (i = 0; i < nr; i++) {
+ for (u64 i = 0; i < nr; i++) {
bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
@@ -615,7 +600,6 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
}
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
@@ -646,17 +630,16 @@ static int rand_mixed_trans(struct btree_trans *trans,
static int rand_mixed(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct btree_iter iter;
struct bkey_i_cookie cookie;
int ret = 0;
- u64 i, rand;
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
- for (i = 0; i < nr; i++) {
- rand = test_rand();
+ for (u64 i = 0; i < nr; i++) {
+ u64 rand = test_rand();
ret = commit_do(trans, NULL, NULL, 0,
rand_mixed_trans(trans, &iter, &cookie, i, rand));
if (ret)
@@ -664,7 +647,6 @@ static int rand_mixed(struct bch_fs *c, u64 nr)
}
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
return ret;
}
@@ -692,31 +674,27 @@ err:
static int rand_delete(struct bch_fs *c, u64 nr)
{
- struct btree_trans *trans = bch2_trans_get(c);
- int ret = 0;
- u64 i;
+ CLASS(btree_trans, trans)(c);
- for (i = 0; i < nr; i++) {
+ for (u64 i = 0; i < nr; i++) {
struct bpos pos = SPOS(0, test_rand(), U32_MAX);
- ret = commit_do(trans, NULL, NULL, 0,
+ int ret = commit_do(trans, NULL, NULL, 0,
__do_delete(trans, pos));
if (ret)
- break;
+ return ret;
}
- bch2_trans_put(trans);
- return ret;
+ return 0;
}
static int seq_insert(struct bch_fs *c, u64 nr)
{
struct bkey_i_cookie insert;
-
bkey_cookie_init(&insert.k_i);
- return bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX),
BTREE_ITER_slots|BTREE_ITER_intent, k,
NULL, NULL, 0, ({
@@ -724,22 +702,22 @@ static int seq_insert(struct bch_fs *c, u64 nr)
break;
insert.k.p = iter.pos;
bch2_trans_update(trans, &iter, &insert.k_i, 0);
- })));
+ }));
}
static int seq_lookup(struct bch_fs *c, u64 nr)
{
- return bch2_trans_run(c,
- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_max(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k,
- 0));
+ 0);
}
static int seq_overwrite(struct bch_fs *c, u64 nr)
{
- return bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ CLASS(btree_trans, trans)(c);
+ return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX),
BTREE_ITER_intent, k,
NULL, NULL, 0, ({
@@ -747,7 +725,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
bkey_reassemble(&u.k_i, k);
bch2_trans_update(trans, &iter, &u.k_i, 0);
- })));
+ }));
}
static int seq_delete(struct bch_fs *c, u64 nr)
@@ -808,8 +786,8 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
{
struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
char name_buf[20];
- struct printbuf nr_buf = PRINTBUF;
- struct printbuf per_sec_buf = PRINTBUF;
+ CLASS(printbuf, nr_buf)();
+ CLASS(printbuf, per_sec_buf)();
unsigned i;
u64 time;
@@ -883,8 +861,6 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
div_u64(time, NSEC_PER_SEC),
div_u64(time * nr_threads, nr),
per_sec_buf.buf);
- printbuf_exit(&per_sec_buf);
- printbuf_exit(&nr_buf);
return j.ret;
}
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index 314a24d15d4e..c2eae0ab7765 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -60,8 +60,7 @@ int bch2_run_thread_with_file(struct thread_with_file *thr,
err:
if (fd >= 0)
put_unused_fd(fd);
- if (thr->task)
- kthread_stop(thr->task);
+ kthread_stop(thr->task);
return ret;
}
@@ -185,23 +184,23 @@ static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubu
break;
}
- spin_lock(&buf->lock);
- size_t makeroom = b;
- if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr))
- makeroom = min_t(ssize_t, makeroom,
- max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr,
- 0));
- darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT);
-
- b = min(len, darray_room(buf->buf));
-
- if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) {
- buf->buf.nr += b;
- ubuf += b;
- len -= b;
- copied += b;
+ scoped_guard(spinlock, &buf->lock) {
+ size_t makeroom = b;
+ if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr))
+ makeroom = min_t(ssize_t, makeroom,
+ max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr,
+ 0));
+ darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT);
+
+ b = min(len, darray_room(buf->buf));
+
+ if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) {
+ buf->buf.nr += b;
+ ubuf += b;
+ len -= b;
+ copied += b;
+ }
}
- spin_unlock(&buf->lock);
if (b) {
wake_up(&buf->wait);
@@ -349,14 +348,15 @@ int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *ubuf, size_t le
if (stdio->done)
return -1;
- spin_lock(&buf->lock);
- int ret = min(len, buf->buf.nr);
- buf->buf.nr -= ret;
- memcpy(ubuf, buf->buf.data, ret);
- memmove(buf->buf.data,
- buf->buf.data + ret,
- buf->buf.nr);
- spin_unlock(&buf->lock);
+ int ret;
+ scoped_guard(spinlock, &buf->lock) {
+ ret = min(len, buf->buf.nr);
+ buf->buf.nr -= ret;
+ memcpy(ubuf, buf->buf.data, ret);
+ memmove(buf->buf.data,
+ buf->buf.data + ret,
+ buf->buf.nr);
+ }
wake_up(&buf->wait);
return ret;
diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c
index 2c34fe4be912..7b5fa44807d7 100644
--- a/fs/bcachefs/time_stats.c
+++ b/fs/bcachefs/time_stats.c
@@ -138,10 +138,8 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
GFP_ATOMIC);
spin_unlock_irqrestore(&stats->lock, flags);
} else {
- struct time_stat_buffer *b;
-
- preempt_disable();
- b = this_cpu_ptr(stats->buffer);
+ guard(preempt)();
+ struct time_stat_buffer *b = this_cpu_ptr(stats->buffer);
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
b->entries[b->nr++] = (struct time_stat_buffer_entry) {
@@ -151,7 +149,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
time_stats_clear_buffer(stats, b);
- preempt_enable();
}
}
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index b5dae1145afa..3776a1403104 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -292,23 +292,9 @@ DEFINE_EVENT(bio, io_read_promote,
TP_ARGS(bio)
);
-TRACE_EVENT(io_read_nopromote,
- TP_PROTO(struct bch_fs *c, int ret),
- TP_ARGS(c, ret),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __array(char, ret, 32 )
- ),
-
- TP_fast_assign(
- __entry->dev = c->dev;
- strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
- ),
-
- TP_printk("%d,%d ret %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ret)
+DEFINE_EVENT(fs_str, io_read_nopromote,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
DEFINE_EVENT(bio, io_read_bounce,
@@ -1330,6 +1316,11 @@ DEFINE_EVENT(fs_str, data_update,
TP_ARGS(c, str)
);
+DEFINE_EVENT(fs_str, data_update_done_no_rw_devs,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
DEFINE_EVENT(fs_str, io_move_pred,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 7a4436fd4441..2ded7f3c835f 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -321,11 +321,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp)
{
- bch_stacktrace stack = { 0 };
+ CLASS(bch_stacktrace, stack)();
int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp);
bch2_prt_backtrace(out, &stack);
- darray_exit(&stack);
return ret;
}
@@ -982,9 +981,8 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
int cpu;
/* access to pcpu vars has to be blocked by other locking */
- preempt_disable();
- ret = this_cpu_ptr(p);
- preempt_enable();
+ scoped_guard(preempt)
+ ret = this_cpu_ptr(p);
for_each_possible_cpu(cpu) {
u64 *i = per_cpu_ptr(p, cpu);
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 6488f098d140..52ac8230be9f 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -216,7 +216,8 @@ void bch2_prt_u64_base2(struct printbuf *, u64);
void bch2_print_string_as_lines(const char *, const char *);
-typedef DARRAY(unsigned long) bch_stacktrace;
+DEFINE_DARRAY_NAMED(bch_stacktrace, unsigned long);
+
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t);
@@ -732,6 +733,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr)
return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
}
+static inline bool __test_and_set_bit_le64(size_t bit, __le64 *addr)
+{
+ bool ret = test_bit_le64(bit, addr);
+ __set_bit_le64(bit, addr);
+ return ret;
+}
+
static inline void memcpy_swab(void *_dst, void *_src, size_t len)
{
u8 *dst = _dst + len;
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 627f153798c6..903e20cd34fa 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -313,8 +313,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
u64 offset = 0, inum = inode->ei_inode.bi_inum;
- int ret = bch2_trans_run(c,
- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs,
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs,
POS(inum, offset),
POS(inum, U64_MAX),
inode->ei_inum.subvol, 0, k, ({
@@ -322,7 +322,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
continue;
bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
- }))) ?:
+ })) ?:
bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false) ?:
bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
@@ -335,9 +335,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler,
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- int ret = bch2_trans_do(c,
- bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
+ CLASS(btree_trans, trans)(c);
+ int ret = lockrestart_do(trans,
+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
if (ret < 0 && bch2_err_matches(ret, ENOENT))
ret = -ENODATA;
@@ -356,12 +357,12 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
struct bch_inode_unpacked inode_u;
int ret;
- ret = bch2_trans_run(c,
- commit_do(trans, NULL, NULL, 0,
+ CLASS(btree_trans, trans)(c);
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_xattr_set(trans, inode_inum(inode), &inode_u,
&hash, name, value, size,
handler->flags, flags)) ?:
- (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0));
+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0);
return bch2_err_class(ret);
}
@@ -418,7 +419,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
bch2_inode_opts_to_opts(&inode->ei_inode);
const struct bch_option *opt;
int id, inode_opt_id;
- struct printbuf out = PRINTBUF;
int ret;
u64 v;
@@ -439,6 +439,7 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
!(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
return -ENODATA;
+ CLASS(printbuf, out)();
v = bch2_opt_get_by_id(&opts, id);
bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0);
@@ -453,7 +454,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
memcpy(buffer, out.buf, out.pos);
}
- printbuf_exit(&out);
return ret;
}
@@ -532,11 +532,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
kfree(buf);
if (ret < 0)
- goto err_class_exit;
+ goto err;
ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v);
if (ret < 0)
- goto err_class_exit;
+ goto err;
s.v = v + 1;
s.defined = true;
@@ -548,7 +548,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
* rename() also has to deal with keeping inherited options up
* to date - see bch2_reinherit_attrs()
*/
- spin_lock(&dentry->d_lock);
+ guard(spinlock)(&dentry->d_lock);
if (!IS_ROOT(dentry)) {
struct bch_inode_info *dir =
to_bch_ei(d_inode(dentry->d_parent));
@@ -557,26 +557,24 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
} else {
s.v = 0;
}
- spin_unlock(&dentry->d_lock);
s.defined = false;
}
- mutex_lock(&inode->ei_update_lock);
- if (inode_opt_id == Inode_opt_project) {
- /*
- * inode fields accessible via the xattr interface are stored
- * with a +1 bias, so that 0 means unset:
- */
- ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
- if (ret)
- goto err;
- }
+ scoped_guard(mutex, &inode->ei_update_lock) {
+ if (inode_opt_id == Inode_opt_project) {
+ /*
+ * inode fields accessible via the xattr interface are stored
+ * with a +1 bias, so that 0 means unset:
+ */
+ ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
+ if (ret)
+ goto err;
+ }
- ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
+ ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
+ }
err:
- mutex_unlock(&inode->ei_update_lock);
-err_class_exit:
return bch2_err_class(ret);
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 9de356bcb411..aa176cc9a324 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
/* Does the block group need to be added to the free space tree? */
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ /* Set after we add a new block group to the free space tree. */
+ BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
/* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
/*
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c7cc24a5dd5e..8c597fa60523 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
{
- WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
+ struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
+
+ if (WARN_ON(node))
+ refcount_dec(&node->refs);
}
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1beb9458f622..0d6ad7512f21 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root)
if (refcount_dec_and_test(&root->refs)) {
if (WARN_ON(!xa_empty(&root->inodes)))
xa_destroy(&root->inodes);
+ if (WARN_ON(!xa_empty(&root->delayed_nodes)))
+ xa_destroy(&root->delayed_nodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
@@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
- if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
- ret = PTR_ERR(root);
+ ret = PTR_ERR(root);
break;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*
* So wait for all ongoing ordered extents to complete and then run
* delayed iputs. This works because once we reach this point no one
- * can either create new ordered extents nor create delayed iputs
- * through some other means.
+ * can create new ordered extents, but delayed iputs can still be added
+ * by a reclaim worker (see comments further below).
*
* Also note that btrfs_wait_ordered_roots() is not safe here, because
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->endio_write_workers);
/* Ordered extents for free space inodes. */
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ /*
+ * Run delayed iputs in case an async reclaim worker is waiting for them
+ * to be run as mentioned above.
+ */
btrfs_run_delayed_iputs(fs_info);
- /* There should be no more workload to generate new delayed iputs. */
- set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
cancel_work_sync(&fs_info->em_shrinker_work);
+ /*
+ * Run delayed iputs again because an async reclaim worker may have
+ * added new ones if it was flushing delalloc:
+ *
+ * shrink_delalloc() -> btrfs_start_delalloc_roots() ->
+ * start_delalloc_inodes() -> btrfs_add_delayed_iput()
+ */
+ btrfs_run_delayed_iputs(fs_info);
+
+ /* There should be no more workload to generate new delayed iputs. */
+ set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
+
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 849199768664..1dc931c4937f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
spin_unlock(&eb->refs_lock);
continue;
}
- xa_unlock_irq(&fs_info->buffer_tree);
/*
* If tree ref isn't set then we know the ref on this eb is a
@@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* check the folio private at the end. And
* release_extent_buffer() will release the refs_lock.
*/
+ xa_unlock_irq(&fs_info->buffer_tree);
release_extent_buffer(eb);
xa_lock_irq(&fs_info->buffer_tree);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 0c573d46639a..a83c268f7f87 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
if (ret < 0)
goto out_locked;
- ASSERT(ret == 0);
+ /*
+ * If ret is 1 (no key found), it means this is an empty block group,
+ * without any extents allocated from it and there's no block group
+ * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
+ * because we are using the block group tree feature, so block group
+ * items are stored in the block group tree. It also means there are no
+ * extents allocated for block groups with a start offset beyond this
+ * block group's end offset (this is the last, highest, block group).
+ */
+ if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
+ ASSERT(ret == 0);
start = block_group->start;
end = block_group->start + block_group->length;
- while (1) {
+ while (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_next_item(extent_root, path);
if (ret < 0)
goto out_locked;
- if (ret)
- break;
}
if (start < end) {
ret = __add_to_free_space_tree(trans, block_group, path2,
@@ -1233,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
{
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
+ struct rb_node *node;
int nr;
int ret;
@@ -1261,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
+ node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *bg;
+
+ bg = rb_entry(node, struct btrfs_block_group, cache_node);
+ clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
+ node = rb_next(node);
+ cond_resched();
+ }
+
return 0;
}
@@ -1350,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
+
+ if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
+ &block_group->runtime_flags))
+ goto next;
+
ret = populate_free_space_tree(trans, block_group);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
+next:
if (btrfs_should_end_transaction(trans)) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(free_space_root, 1);
@@ -1382,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
+ /*
+ * While rebuilding the free space tree we may allocate new metadata
+ * block groups while modifying the free space tree.
+ *
+ * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
+ * can use multiple transactions, every time btrfs_end_transaction() is
+ * called at btrfs_rebuild_free_space_tree() we finish the creation of
+ * new block groups by calling btrfs_create_pending_block_groups(), and
+ * that in turn calls us, through add_block_group_free_space(), to add
+ * a free space info item and a free space extent item for the block
+ * group.
+ *
+ * Then later btrfs_rebuild_free_space_tree() may find such new block
+ * groups and processes them with populate_free_space_tree(), which can
+ * fail with EEXIST since there are already items for the block group in
+ * the free space tree. Notice that we say "may find" because a new
+ * block group may be added to the block groups rbtree in a node before
+ * or after the block group currently being processed by the rebuild
+ * process. So signal the rebuild process to skip such new block groups
+ * if it finds them.
+ */
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
ret = add_new_free_space_info(trans, block_group, path);
if (ret)
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c0c778243bf1..fc66872b4c74 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
if (ret) {
- btrfs_info(fs_info,
- "failed to delete reference to %.*s, inode %llu parent %llu",
- name->len, name->name, ino, dir_ino);
+ btrfs_crit(fs_info,
+ "failed to delete reference to %.*s, root %llu inode %llu parent %llu",
+ name->len, name->name, btrfs_root_id(root), ino, dir_ino);
btrfs_abort_transaction(trans, ret);
goto err;
}
@@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
int ret = 0;
struct btrfs_trans_handle *trans;
- u64 last_unlink_trans;
struct fscrypt_name fname;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
@@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
goto out_notrans;
}
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to its
+ * parent directory. This is to prevent an unrecoverable log tree in the
+ * case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ *
+ * This is because we can't unlink other roots when replaying the dir
+ * deletes for directory foo.
+ */
+ if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
goto out;
@@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (ret)
goto out;
- last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
/* now the directory is empty */
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
- if (!ret) {
+ if (!ret)
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * Propagate the last_unlink_trans value of the deleted dir to
- * its parent directory. This is to prevent an unrecoverable
- * log tree in the case we do something like this:
- * 1) create dir foo
- * 2) create snapshot under dir foo
- * 3) delete the snapshot
- * 4) rmdir foo
- * 5) mkdir foo
- * 6) fsync foo or some file inside foo
- */
- if (last_unlink_trans >= trans->transid)
- BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
- }
out:
btrfs_end_transaction(trans);
out_notrans:
@@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret;
int ret2;
bool need_abort = false;
+ bool logs_pinned = false;
struct fscrypt_name old_fname, new_fname;
struct fscrypt_str *old_name, *new_name;
@@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
inode_inc_iversion(new_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID &&
+ new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not for
+ * root entries) pin the log early to prevent any concurrent
+ * task from logging the directory after we removed the old
+ * entries and before we add the new entries, otherwise that
+ * task can sync a log without any entry for the inodes we are
+ * renaming and therefore replaying that log, if a power failure
+ * happens after syncing the log, would result in deleting the
+ * inodes.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode)->dir_index = new_idx;
/*
- * Now pin the logs of the roots. We do it to ensure that no other task
- * can sync the logs while we are in progress with the rename, because
- * that could result in an inconsistency in case any of the inodes that
- * are part of this rename operation were logged before.
+ * Do the log updates for all inodes.
+ *
+ * If either entry is for a root we don't need to update the logs since
+ * we've called btrfs_set_log_full_commit() before.
*/
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(dest);
-
- /* Do the log updates for all inodes. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned) {
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
old_rename_ctx.index, new_dentry->d_parent);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
new_rename_ctx.index, old_dentry->d_parent);
+ }
- /* Now unpin the logs. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+out_fail:
+ if (logs_pinned) {
btrfs_end_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(dest);
-out_fail:
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
struct fscrypt_name old_fname, new_fname;
+ bool logs_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap,
inode_inc_iversion(old_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not a
+ * root entry) pin the log to prevent any concurrent task from
+ * logging the directory after we removed the old entry and
+ * before we add the new entry, otherwise that task can sync
+ * a log without any entry for the inode we are renaming and
+ * therefore replaying that log, if a power failure happens
+ * after syncing the log, would result in deleting the inode.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
rename_ctx.index, new_dentry->d_parent);
@@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap,
}
}
out_fail:
+ if (logs_pinned) {
+ btrfs_end_log_trans(root);
+ btrfs_end_log_trans(dest);
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 913acef3f0a9..8a60983a697c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out;
}
+ btrfs_record_new_subvolume(trans, BTRFS_I(dir));
+
ret = btrfs_create_new_inode(trans, &new_inode_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- btrfs_record_new_subvolume(trans, BTRFS_I(dir));
-
d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL;
@@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
- btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
+ btrfs_err(fs_info, "scrub: extent tree v2 not yet supported");
return -EINVAL;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ce36fafc771e..7cd5e76a783c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
+"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
err:
btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
+ "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
/* Super block error, no need to search extent tree. */
if (is_super) {
- btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
+ btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu",
errstr, btrfs_dev_name(dev), physical);
return;
}
@@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
&ref_level);
if (ret < 0) {
btrfs_warn(fs_info,
- "failed to resolve tree backref for logical %llu: %d",
- swarn.logical, ret);
+ "scrub: failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
break;
}
if (ret > 0)
break;
btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
+"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical, btrfs_dev_name(dev),
swarn.physical, (ref_level ? "node" : "leaf"),
ref_level, ref_root);
@@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad bytenr, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_bytenr(header), logical);
return;
@@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad fsid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
header->fsid, fs_info->fs_devices->fsid);
return;
@@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
logical, stripe->mirror_num,
header->chunk_tree_uuid, fs_info->chunk_tree_uuid);
return;
@@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
+"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
logical, stripe->mirror_num,
CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
@@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad generation, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_generation(header),
stripe->sectors[sector_nr].generation);
@@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
*/
if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) {
btrfs_warn_rl(fs_info,
- "tree block at %llu crosses stripe boundary %llu",
+ "scrub: tree block at %llu crosses stripe boundary %llu",
stripe->logical +
(sector_nr << fs_info->sectorsize_bits),
stripe->logical);
@@ -1046,12 +1046,12 @@ skip:
if (repaired) {
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on dev %s physical %llu",
+ "scrub: fixed up error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on mirror %u",
+ "scrub: fixed up error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
continue;
@@ -1060,12 +1060,12 @@ skip:
/* The remaining are all for unrepaired. */
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on dev %s physical %llu",
+"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on mirror %u",
+ "scrub: unable to fixup (regular) error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
@@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
physical,
sctx->write_pointer);
if (ret)
- btrfs_err(fs_info,
- "zoned: failed to recover write pointer");
+ btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer");
}
mutex_unlock(&sctx->wr_lock);
btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
@@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
int ret;
if (unlikely(!extent_root || !csum_root)) {
- btrfs_err(fs_info, "no valid extent or csum root for scrub");
+ btrfs_err(fs_info, "scrub: no valid extent or csum root found");
return -EUCLEAN;
}
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
@@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
btrfs_err(fs_info,
- "stripe %llu has unrepaired metadata sector at %llu",
+ "scrub: stripe %llu has unrepaired metadata sector at logical %llu",
stripe->logical,
stripe->logical + (i << fs_info->sectorsize_bits));
return true;
@@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
btrfs_err(fs_info,
-"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
+"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
&error);
ret = -EIO;
@@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ro_set = 0;
} else if (ret == -ETXTBSY) {
btrfs_warn(fs_info,
- "skipping scrub of block group %llu due to active swapfile",
+ "scrub: skipping scrub of block group %llu due to active swapfile",
cache->start);
scrub_pause_off(fs_info);
ret = 0;
goto skip_unfreeze;
} else {
- btrfs_warn(fs_info,
- "failed setting block group ro: %d", ret);
+ btrfs_warn(fs_info, "scrub: failed setting block group ro: %d",
+ ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
@@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
ret = btrfs_check_super_csum(fs_info, sb);
if (ret != 0) {
btrfs_err_rl(fs_info,
- "super block at physical %llu devid %llu has bad csum",
+ "scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
if (btrfs_super_generation(sb) != generation) {
btrfs_err_rl(fs_info,
-"super block at physical %llu devid %llu has bad generation %llu expect %llu",
+"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
btrfs_super_generation(sb), generation);
return -EUCLEAN;
@@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_err_in_rcu(fs_info,
- "scrub on devid %llu: filesystem on %s is not writable",
+ "scrub: devid %llu: filesystem on %s is not writable",
devid, btrfs_dev_name(dev));
ret = -EROFS;
goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 97e933113b82..cea8a7e9d6d3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
unsigned int nofs_flag;
struct btrfs_inode *inode;
+ /* Only meant to be called for subvolume roots and not for log roots. */
+ ASSERT(is_fstree(btrfs_root_id(root)));
+
/*
* We're holding a transaction handle whether we are logging or
* replaying a log tree, so we must make sure NOFS semantics apply
@@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
return 0;
}
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
- u64 objectid)
-{
- struct btrfs_inode *inode;
-
- inode = btrfs_iget_logging(objectid, root);
- if (IS_ERR(inode))
- return NULL;
- return inode;
-}
-
/* replays a single extent in 'eb' at 'slot' with 'key' into the
* subvolume 'root'. path is released on entry and should be released
* on exit.
@@ -668,15 +656,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
extent_end = ALIGN(start + size,
fs_info->sectorsize);
} else {
- ret = 0;
- goto out;
+ btrfs_err(fs_info,
+ "unexpected extent type=%d root=%llu inode=%llu offset=%llu",
+ found_type, btrfs_root_id(root), key->objectid, key->offset);
+ return -EUCLEAN;
}
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- ret = -EIO;
- goto out;
- }
+ inode = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
/*
* first check to see if we already have this extent in the
@@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -961,7 +950,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
out:
kfree(name.name);
- iput(&inode->vfs_inode);
+ if (inode)
+ iput(&inode->vfs_inode);
return ret;
}
@@ -1072,7 +1062,9 @@ again:
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = parent_objectid;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret == 0) {
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
struct btrfs_inode_ref *victim_ref;
unsigned long ptr;
unsigned long ptr_end;
@@ -1145,13 +1137,13 @@ again:
struct fscrypt_str victim_name;
extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
goto next;
ret = read_alloc_one_name(leaf, &extref->name,
- btrfs_inode_extref_name_len(leaf, extref),
- &victim_name);
+ victim_name.len, &victim_name);
if (ret)
return ret;
@@ -1166,18 +1158,18 @@ again:
kfree(victim_name.name);
return ret;
} else if (!ret) {
- ret = -ENOENT;
- victim_parent = read_one_inode(root,
- parent_objectid);
- if (victim_parent) {
+ victim_parent = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(victim_parent)) {
+ ret = PTR_ERR(victim_parent);
+ } else {
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
ret = unlink_inode_for_log_replay(trans,
victim_parent,
inode, &victim_name);
+ iput(&victim_parent->vfs_inode);
}
- iput(&victim_parent->vfs_inode);
kfree(victim_name.name);
if (ret)
return ret;
@@ -1314,9 +1306,9 @@ again:
struct btrfs_inode *dir;
btrfs_release_path(path);
- dir = read_one_inode(root, parent_id);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_id, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
kfree(name.name);
goto out;
}
@@ -1388,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* copy the back ref in. The link count fixup code will take
* care of the rest
*/
- dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
goto out;
}
- inode = read_one_inode(root, inode_objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(inode_objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -1408,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* parent object can change from one array
* item to another.
*/
- if (!dir)
- dir = read_one_inode(root, parent_objectid);
if (!dir) {
- ret = -ENOENT;
- goto out;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
+ goto out;
+ }
}
} else {
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
@@ -1681,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
btrfs_release_path(path);
- inode = read_one_inode(root, key.offset);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.offset, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
@@ -1719,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode;
struct inode *vfs_inode;
- inode = read_one_inode(root, objectid);
- if (!inode)
- return -EIO;
+ inode = btrfs_iget_logging(objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
vfs_inode = &inode->vfs_inode;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
@@ -1760,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir;
int ret;
- inode = read_one_inode(root, location->objectid);
- if (!inode)
- return -ENOENT;
+ inode = btrfs_iget_logging(location->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- dir = read_one_inode(root, dirid);
- if (!dir) {
+ dir = btrfs_iget_logging(dirid, root);
+ if (IS_ERR(dir)) {
iput(&inode->vfs_inode);
- return -EIO;
+ return PTR_ERR(dir);
}
ret = btrfs_add_link(trans, dir, inode, name, 1, index);
@@ -1844,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
bool update_size = true;
bool name_added = false;
- dir = read_one_inode(root, key->objectid);
- if (!dir)
- return -EIO;
+ dir = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
if (ret)
@@ -2146,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -2300,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (!log_path)
return -ENOMEM;
- dir = read_one_inode(root, dirid);
- /* it isn't an error if the inode isn't there, that can happen
- * because we replay the deletes before we copy in the inode item
- * from the log
+ dir = btrfs_iget_logging(dirid, root);
+ /*
+ * It isn't an error if the inode isn't there, that can happen because
+ * we replay the deletes before we copy in the inode item from the log.
*/
- if (!dir) {
+ if (IS_ERR(dir)) {
btrfs_free_path(log_path);
- return 0;
+ ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
}
range_start = 0;
@@ -2466,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_inode *inode;
u64 from;
- inode = read_one_inode(root, key.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
from = ALIGN(i_size_read(&inode->vfs_inode),
@@ -7447,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full log sync.
* Also we don't need to worry with renames, since btrfs_rename() marks the log
* for full commit when renaming a subvolume.
+ *
+ * Must be called before creating the subvolume entry in its parent directory.
*/
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 89835071cfea..f475b4b7c457 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
device->bytes_used - dev_extent_len);
atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
btrfs_clear_space_info_full(fs_info);
+
+ if (list_empty(&device->post_commit_list)) {
+ list_add_tail(&device->post_commit_list,
+ &trans->transaction->dev_update_list);
+ }
+
mutex_unlock(&fs_info->chunk_mutex);
}
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b5b0156d5b95..9430b34d3cbb 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
zone_info[1].physical);
return -EIO;
}
+
+ if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
+ zone_info[0].alloc_offset = last_alloc;
+
+ if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
+ zone_info[1].alloc_offset = last_alloc;
+
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
@@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
int i;
@@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
for (i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ zone_info[i].alloc_offset = last_alloc;
+
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_err(fs_info,
@@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
+ div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > i)
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == i)
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
@@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (test_bit(0, active) != test_bit(i, active)) {
@@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr,
+ map->num_stripes / map->sub_stripes);
+ div_u64_rem(stripe_nr,
+ (map->num_stripes / map->sub_stripes),
+ &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > (i / map->sub_stripes))
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == (i / map->sub_stripes))
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if ((i % map->sub_stripes) == 0) {
bg->zone_capacity += zone_info[i].capacity;
bg->alloc_offset += zone_info[i].alloc_offset;
@@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
+ last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
- ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid1(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID0:
- ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid0(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID10:
- ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid10(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID5:
case BTRFS_BLOCK_GROUP_RAID6:
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 6a329c329f43..16e4a6bd9b97 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -214,9 +214,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/*
* bit 30: I/O error occurred on this folio
+ * bit 29: CPU has dirty data in D-cache (needs aliasing handling);
* bit 0 - 29: remaining parts to complete this folio
*/
-#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+#define EROFS_ONLINEFOLIO_EIO 30
+#define EROFS_ONLINEFOLIO_DIRTY 29
void erofs_onlinefolio_init(struct folio *folio)
{
@@ -233,19 +235,23 @@ void erofs_onlinefolio_split(struct folio *folio)
atomic_inc((atomic_t *)&folio->private);
}
-void erofs_onlinefolio_end(struct folio *folio, int err)
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
{
int orig, v;
do {
orig = atomic_read((atomic_t *)&folio->private);
- v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ DBG_BUGON(orig <= 0);
+ v = dirty << EROFS_ONLINEFOLIO_DIRTY;
+ v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
- if (v & ~EROFS_ONLINEFOLIO_EIO)
+ if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
return;
folio->private = 0;
- folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+ if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
+ flush_dcache_folio(folio);
+ folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -351,11 +357,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
+ trace_erofs_read_folio(folio, true);
+
return iomap_read_folio(folio, &erofs_iomap_ops);
}
static void erofs_readahead(struct readahead_control *rac)
{
+ trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
+ readahead_count(rac), true);
+
return iomap_readahead(rac, &erofs_iomap_ops);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index bf62e2836b60..358061d7b660 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
cur = min(cur, rq->outputsize);
if (cur && rq->out[0]) {
kin = kmap_local_page(rq->in[nrpages_in - 1]);
- if (rq->out[0] == rq->in[nrpages_in - 1]) {
+ if (rq->out[0] == rq->in[nrpages_in - 1])
memmove(kin + rq->pageofs_out, kin + pi, cur);
- flush_dcache_page(rq->out[0]);
- } else {
+ else
memcpy_to_page(rq->out[0], rq->pageofs_out,
kin + pi, cur);
- }
kunmap_local(kin);
}
rq->outputsize -= cur;
@@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
DBG_BUGON(no >= nrpages_out);
cnt = min(insz - pi, PAGE_SIZE - po);
- if (rq->out[no] == rq->in[ni]) {
+ if (rq->out[no] == rq->in[ni])
memmove(kin + po,
kin + rq->pageofs_in + pi, cnt);
- flush_dcache_page(rq->out[no]);
- } else if (rq->out[no]) {
+ else if (rq->out[no])
memcpy_to_page(rq->out[no], po,
kin + rq->pageofs_in + pi, cnt);
- }
pi += cnt;
} while (pi < insz);
kunmap_local(kin);
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 2fae209d0274..3e4b38bec0aa 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -58,6 +58,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
de = erofs_bread(&buf, dbstart, true);
if (IS_ERR(de)) {
erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",
@@ -88,6 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
break;
ctx->pos = dbstart + maxsize;
ofs = 0;
+ cond_resched();
}
erofs_put_metabuf(&buf);
if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) {
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 7d81f504bff0..91781718199e 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
} else {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
- erofs_onlinefolio_end(fi.folio, ret);
+ erofs_onlinefolio_end(fi.folio, ret, false);
}
}
bio_uninit(&rq->bio);
@@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
{
+ const struct cred *old_cred;
struct iov_iter iter;
int ret;
@@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
rq->iocb.ki_flags = IOCB_DIRECT;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
+ old_cred = override_creds(rq->iocb.ki_filp->f_cred);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
+ revert_creds(old_cred);
if (ret != -EIOCBQUEUED)
erofs_fileio_ki_complete(&rq->iocb, ret);
}
@@ -93,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
struct erofs_map_blocks *map = &io->map;
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
loff_t pos = folio_pos(folio), ofs;
- struct iov_iter iter;
- struct bio_vec bv;
int err = 0;
erofs_onlinefolio_init(folio);
@@ -119,13 +120,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
err = PTR_ERR(src);
break;
}
- bvec_set_folio(&bv, folio, len, cur);
- iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len);
- if (copy_to_iter(src, len, &iter) != len) {
- erofs_put_metabuf(&buf);
- err = -EIO;
- break;
- }
+ memcpy_to_folio(folio, cur, src, len);
erofs_put_metabuf(&buf);
} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, cur + len);
@@ -159,7 +154,7 @@ io_retry:
}
cur += len;
}
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index a32c03a80c70..06b867d2fc3b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -315,10 +315,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as,
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED 0x0008
/* Located in the special packed inode */
-#define EROFS_MAP_FRAGMENT 0x0010
+#define __EROFS_MAP_FRAGMENT 0x0010
/* The extent refers to partial decompressed data */
#define EROFS_MAP_PARTIAL_REF 0x0020
+#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT)
+
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -390,7 +392,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
-void erofs_onlinefolio_end(struct folio *folio, int err);
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index fe8071844724..e3f28a1bb945 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1034,7 +1034,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, end);
tight = false;
- } else if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ } else if (map->m_flags & __EROFS_MAP_FRAGMENT) {
erofs_off_t fpos = offset + cur - map->m_la;
err = z_erofs_read_fragment(inode->i_sb, folio, cur,
@@ -1091,7 +1091,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
tight = (bs == PAGE_SIZE);
}
} while ((end = cur) > 0);
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
@@ -1196,7 +1196,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
cur += len;
}
kunmap_local(dst);
- erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
+ erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true);
list_del(p);
kfree(bvi);
}
@@ -1355,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- erofs_onlinefolio_end(page_folio(page), err);
+ erofs_onlinefolio_end(page_folio(page), err, true);
continue;
}
if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) {
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 14ea47f954f5..f1a15ff22147 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
- map->m_flags = EROFS_MAP_MAPPED |
- EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
return 0;
}
initial_lcn = ofs >> lclusterbits;
@@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
goto unmap_out;
}
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
- map->m_flags |= EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
} else {
map->m_pa = erofs_pos(sb, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -597,6 +596,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (la > map->m_la) {
r = mid;
+ if (la > lend) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
lend = la;
} else {
l = mid + 1;
@@ -613,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (lstart < lend) {
map->m_la = lstart;
if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
- map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
vi->z_fragmentoff = map->m_plen;
if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
@@ -635,12 +638,6 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
}
}
map->m_llen = lend - map->m_la;
- if (!last && map->m_llen < sb->s_blocksize) {
- erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu",
- map->m_llen, map->m_la, vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
return 0;
}
@@ -799,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
- iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
+ iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d4dbffdedd08..0fbf5dfedb24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -883,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
kfree_rcu(epi, rcu);
percpu_counter_dec(&ep->user->epoll_watches);
- return ep_refcount_dec_and_test(ep);
+ return true;
}
/*
@@ -891,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
*/
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
{
- WARN_ON_ONCE(__ep_remove(ep, epi, false));
+ if (__ep_remove(ep, epi, false))
+ WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
}
static void ep_clear_and_put(struct eventpoll *ep)
{
struct rb_node *rbp, *next;
struct epitem *epi;
- bool dispose;
/* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait))
@@ -931,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep)
cond_resched();
}
- dispose = ep_refcount_dec_and_test(ep);
mutex_unlock(&ep->mtx);
-
- if (dispose)
+ if (ep_refcount_dec_and_test(ep))
ep_free(ep);
}
@@ -1137,7 +1135,7 @@ again:
dispose = __ep_remove(ep, epi, true);
mutex_unlock(&ep->mtx);
- if (dispose)
+ if (dispose && ep_refcount_dec_and_test(ep))
ep_free(ep);
goto again;
}
diff --git a/fs/exec.c b/fs/exec.c
index 1f5fdd2e096e..ba400aafd640 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
bool path_noexec(const struct path *path)
{
+ /* If it's an anonymous inode make sure that we catch any shenanigans. */
+ VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC));
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
@@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (IS_ERR(file))
return file;
+ if (path_noexec(&file->f_path))
+ return ERR_PTR(-EACCES);
+
/*
* In the past the regular type check was here. It moved to may_open() in
* 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
* an invariant that all non-regular files error out before we get here.
*/
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)))
return ERR_PTR(-EACCES);
err = exe_file_deny_write_access(file);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6bd3de64f2a8..696131e655ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,6 +35,17 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
+static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+{
+ loff_t old_size = i_size_read(inode);
+
+ if (old_size >= new_size)
+ return;
+
+ /* zero or drop pages only in range of [old_size, new_size] */
+ truncate_pagecache(inode, old_size);
+}
+
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
+ filemap_invalidate_unlock(inode->i_mapping);
+
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
+
folio_lock(folio);
if (unlikely(folio->mapping != inode->i_mapping ||
folio_pos(folio) > i_size_read(inode) ||
@@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ if (attr->ia_size > old_size)
+ f2fs_zero_post_eof_page(inode, attr->ia_size);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
@@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
+ filemap_invalidate_lock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
+
+ f2fs_zero_post_eof_page(inode, offset + len);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
@@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
err = file_modified(file);
if (err)
return err;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
+ filemap_invalidate_unlock(inode->i_mapping);
return count;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1cb4cba7f961..bfe104db284e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2078,7 +2078,6 @@ write_node:
if (!__write_node_folio(folio, false, &submitted,
wbc, do_balance, io_type, NULL)) {
- folio_unlock(folio);
folio_batch_release(&fbatch);
ret = -EIO;
goto out;
diff --git a/fs/file.c b/fs/file.c
index 3a3146664cf3..b6db031545e6 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1198,8 +1198,12 @@ bool file_seek_cur_needs_f_lock(struct file *file)
if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared)
return false;
- VFS_WARN_ON_ONCE((file_count(file) > 1) &&
- !mutex_is_locked(&file->f_pos_lock));
+ /*
+ * Note that we are not guaranteed to be called after fdget_pos() on
+ * this file obj, in which case the caller is expected to provide the
+ * appropriate locking.
+ */
+
return true;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f102afc03359..47006d0753f1 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
- unsigned int max_pages)
+ unsigned int max_folios)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
@@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
int err = 0;
num = min(iov_iter_count(ii), fc->max_write);
- num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
- while (num) {
+ while (num && ap->num_folios < max_folios) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index bfe8d8af46f3..9572bdef49ee 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -9,6 +9,7 @@
#include "fuse_i.h"
#include "dev_uring_i.h"
+#include <linux/dax.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
@@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode)
/* Will write inode on close/munmap and in all other dirtiers */
WARN_ON(inode->i_state & I_DIRTY_INODE);
+ if (FUSE_IS_DAX(inode))
+ dax_break_layout_final(inode);
+
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) {
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a8..6f487fc6be34 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s)
*/
inode->i_state = I_DIRTY;
/*
- * Historically anonymous inodes didn't have a type at all and
- * userspace has come to rely on this. Internally they're just
- * regular files but S_IFREG is masked off when reporting
- * information to userspace.
+ * Historically anonymous inodes don't have a type at all and
+ * userspace has come to rely on this.
*/
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE | S_ANON_INODE;
diff --git a/fs/namei.c b/fs/namei.c
index 4bb889fc980b..c26a7ee42184 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap,
* @base: base directory to lookup from
*
* Look up a dentry by name in the dcache, returning NULL if it does not
- * currently exist. The function does not try to create a dentry.
+ * currently exist. The function does not try to create a dentry and if one
+ * is found it doesn't try to revalidate it.
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code. It does no permission checking.
@@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base)
if (err)
return ERR_PTR(err);
- return lookup_dcache(name, base, 0);
+ return d_lookup(base, name);
}
EXPORT_SYMBOL(try_lookup_noperm);
@@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked);
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code. It does no permission checking.
*
- * Unlike lookup_noperm, it should be called without the parent
+ * Unlike lookup_noperm(), it should be called without the parent
* i_rwsem held, and will take the i_rwsem itself if necessary.
+ *
+ * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already
+ * existed.
*/
struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base)
{
struct dentry *ret;
+ int err;
- ret = try_lookup_noperm(name, base);
+ err = lookup_noperm_common(name, base);
+ if (err)
+ return ERR_PTR(err);
+
+ ret = lookup_dcache(name, base, 0);
if (!ret)
ret = lookup_slow(name, base, 0);
return ret;
@@ -3471,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return -EACCES;
break;
default:
- VFS_BUG_ON_INODE(1, inode);
+ VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
diff --git a/fs/namespace.c b/fs/namespace.c
index e13d9ab4f564..54c59e091919 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2310,21 +2310,62 @@ out:
return dst_mnt;
}
-/* Caller should check returned pointer for errors */
+static inline bool extend_array(struct path **res, struct path **to_free,
+ unsigned n, unsigned *count, unsigned new_count)
+{
+ struct path *p;
-struct vfsmount *collect_mounts(const struct path *path)
+ if (likely(n < *count))
+ return true;
+ p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL);
+ if (p && *count)
+ memcpy(p, *res, *count * sizeof(struct path));
+ *count = new_count;
+ kfree(*to_free);
+ *to_free = *res = p;
+ return p;
+}
+
+struct path *collect_paths(const struct path *path,
+ struct path *prealloc, unsigned count)
{
- struct mount *tree;
- namespace_lock();
- if (!check_mnt(real_mount(path->mnt)))
- tree = ERR_PTR(-EINVAL);
- else
- tree = copy_tree(real_mount(path->mnt), path->dentry,
- CL_COPY_ALL | CL_PRIVATE);
- namespace_unlock();
- if (IS_ERR(tree))
- return ERR_CAST(tree);
- return &tree->mnt;
+ struct mount *root = real_mount(path->mnt);
+ struct mount *child;
+ struct path *res = prealloc, *to_free = NULL;
+ unsigned n = 0;
+
+ guard(rwsem_read)(&namespace_sem);
+
+ if (!check_mnt(root))
+ return ERR_PTR(-EINVAL);
+ if (!extend_array(&res, &to_free, 0, &count, 32))
+ return ERR_PTR(-ENOMEM);
+ res[n++] = *path;
+ list_for_each_entry(child, &root->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, path->dentry))
+ continue;
+ for (struct mount *m = child; m; m = next_mnt(m, child)) {
+ if (!extend_array(&res, &to_free, n, &count, 2 * count))
+ return ERR_PTR(-ENOMEM);
+ res[n].mnt = &m->mnt;
+ res[n].dentry = m->mnt.mnt_root;
+ n++;
+ }
+ }
+ if (!extend_array(&res, &to_free, n, &count, count + 1))
+ return ERR_PTR(-ENOMEM);
+ memset(res + n, 0, (count - n) * sizeof(struct path));
+ for (struct path *p = res; p->mnt; p++)
+ path_get(p);
+ return res;
+}
+
+void drop_collected_paths(struct path *paths, struct path *prealloc)
+{
+ for (struct path *p = paths; p->mnt; p++)
+ path_put(p);
+ if (paths != prealloc)
+ kfree(paths);
}
static void free_mnt_ns(struct mnt_namespace *);
@@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt)
free_mnt_ns(ns);
}
-void drop_collected_mounts(struct vfsmount *mnt)
-{
- namespace_lock();
- lock_mount_hash();
- umount_tree(real_mount(mnt), 0);
- unlock_mount_hash();
- namespace_unlock();
-}
-
static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path)
}
EXPORT_SYMBOL_GPL(clone_private_mount);
-int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
- struct vfsmount *root)
-{
- struct mount *mnt;
- int res = f(root, arg);
- if (res)
- return res;
- list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
- res = f(&mnt->mnt, arg);
- if (res)
- return res;
- }
- return 0;
-}
-
static void lock_mnt_tree(struct mount *mnt)
{
struct mount *p;
@@ -2751,14 +2768,14 @@ static int attach_recursive_mnt(struct mount *source_mnt,
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- if (q)
- mnt_change_mountpoint(child, smp, q);
/* Notice when we are propagating across user namespaces */
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
lock_mnt_tree(child);
child->mnt.mnt_flags &= ~MNT_LOCKED;
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q)
+ mnt_change_mountpoint(child, smp, q);
commit_tree(child);
}
put_mountpoint(smp);
@@ -5290,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
kattr.kflags |= MOUNT_KATTR_RECURSE;
ret = wants_mount_setattr(uattr, usize, &kattr);
- if (ret < 0)
- return ret;
-
- if (ret) {
+ if (ret > 0) {
ret = do_mount_setattr(&file->f_path, &kattr);
- if (ret)
- return ret;
-
finish_mount_kattr(&kattr);
}
+ if (ret)
+ return ret;
}
fd = get_unused_fd_flags(flags & O_CLOEXEC);
@@ -6262,7 +6275,11 @@ void put_mnt_ns(struct mnt_namespace *ns)
{
if (!refcount_dec_and_test(&ns->ns.count))
return;
- drop_collected_mounts(&ns->root->mnt);
+ namespace_lock();
+ lock_mount_hash();
+ umount_tree(ns->root, 0);
+ unlock_mount_hash();
+ namespace_unlock();
free_mnt_ns(ns);
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 72a3e6db2524..f27ea5099a68 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
* data written into the pagecache until we can find out from the server what
* the values actually are.
*/
-static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
- loff_t i_size, loff_t pos, size_t copied)
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied)
{
+ loff_t i_size, end = pos + copied;
blkcnt_t add;
size_t gap;
+ if (end <= i_size_read(inode))
+ return;
+
if (ctx->ops->update_i_size) {
- ctx->ops->update_i_size(inode, pos);
+ ctx->ops->update_i_size(inode, end);
return;
}
- i_size_write(inode, pos);
+ spin_lock(&inode->i_lock);
+
+ i_size = i_size_read(inode);
+ if (end > i_size) {
+ i_size_write(inode, end);
#if IS_ENABLED(CONFIG_FSCACHE)
- fscache_update_cookie(ctx->cache, NULL, &pos);
+ fscache_update_cookie(ctx->cache, NULL, &end);
#endif
- gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
- if (copied > gap) {
- add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
+ gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
+ if (copied > gap) {
+ add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
- inode->i_blocks = min_t(blkcnt_t,
- DIV_ROUND_UP(pos, SECTOR_SIZE),
- inode->i_blocks + add);
+ inode->i_blocks = min_t(blkcnt_t,
+ DIV_ROUND_UP(end, SECTOR_SIZE),
+ inode->i_blocks + add);
+ }
}
+ spin_unlock(&inode->i_lock);
}
/**
@@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio = NULL, *writethrough = NULL;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
- loff_t i_size, pos = iocb->ki_pos;
+ loff_t pos = iocb->ki_pos;
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
@@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
+ netfs_update_i_size(ctx, inode, pos, copied);
pos += copied;
- i_size = i_size_read(inode);
- if (pos > i_size)
- netfs_update_i_size(ctx, inode, i_size, pos, copied);
written += copied;
if (likely(!wreq)) {
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index fa9a5bf3c6d5..a16660ab7f83 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -9,20 +9,6 @@
#include <linux/uio.h>
#include "internal.h"
-static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
-{
- struct inode *inode = wreq->inode;
- unsigned long long end = wreq->start + wreq->transferred;
-
- if (!wreq->error &&
- i_size_read(inode) < end) {
- if (wreq->netfs_ops->update_i_size)
- wreq->netfs_ops->update_i_size(inode, end);
- else
- i_size_write(inode, end);
- }
-}
-
/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
@@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (async)
wreq->iocb = iocb;
wreq->len = iov_iter_count(&wreq->buffer.iter);
- wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
_debug("begin = %zd", ret);
@@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
if (!async) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
ret = netfs_wait_for_write(wreq);
if (ret > 0)
iocb->ki_pos += ret;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index e2ee9183392b..d4f16fefd965 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
/*
+ * buffered_write.c
+ */
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied);
+
+/*
* main.c
*/
extern unsigned int netfs_debug;
@@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
enum netfs_rreq_trace trace)
{
if (test_bit(rreq_flag, &rreq->flags)) {
- trace_netfs_rreq(rreq, trace);
clear_bit_unlock(rreq_flag, &rreq->flags);
smp_mb__after_atomic(); /* Set flag before task state */
+ trace_netfs_rreq(rreq, trace);
wake_up(&rreq->waitq);
}
}
/*
+ * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+}
+
+/*
+ * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 3db401d269e7..73da6c9f5777 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
if (v == &netfs_io_requests) {
seq_puts(m,
- "REQUEST OR REF FL ERR OPS COVERAGE\n"
- "======== == === == ==== === =========\n"
+ "REQUEST OR REF FLAG ERR OPS COVERAGE\n"
+ "======== == === ==== ==== === =========\n"
);
return 0;
}
rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m,
- "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
+ "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx",
rreq->debug_id,
netfs_origins[rreq->origin],
refcount_read(&rreq->ref),
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 43b67a28a8fa..20748bcfbf59 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
continue;
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce);
for (;;) {
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
break;
trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
}
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce);
finish_wait(&rreq->waitq, &myself);
}
@@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
static int netfs_collect_in_app(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
- bool need_collect = false, inactive = true;
+ bool need_collect = false, inactive = true, done = true;
+
+ if (!netfs_check_rreq_in_progress(rreq)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_recollect);
+ return 1; /* Done */
+ }
for (int i = 0; i < NR_IO_STREAMS; i++) {
struct netfs_io_subrequest *subreq;
@@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
struct netfs_io_subrequest,
rreq_link);
if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ (!netfs_check_subreq_in_progress(subreq) ||
test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
need_collect = true;
break;
}
+ if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
+ done = false;
}
- if (!need_collect && !inactive)
+ if (!need_collect && !inactive && !done)
return 0; /* Sleep */
__set_current_state(TASK_RUNNING);
@@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
/*
* Wait for a request to complete, successfully or otherwise.
*/
-static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
- bool (*collector)(struct netfs_io_request *rreq))
+static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
{
DEFINE_WAIT(myself);
ssize_t ret;
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ if (!netfs_check_rreq_in_progress(rreq))
break;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip);
finish_wait(&rreq->waitq, &myself);
ret = rreq->error;
@@ -478,12 +488,12 @@ all_collected:
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_read_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_read_collection);
}
ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_write_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_write_collection);
}
/*
@@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
{
DEFINE_WAIT(myself);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ if (!netfs_check_rreq_in_progress(rreq) ||
!test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
break;
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause);
finish_wait(&rreq->waitq, &myself);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 96ee18af28ef..3e804da1e1eb 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -218,7 +218,7 @@ reassess:
stream->collected_to = front->start;
}
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
+ if (netfs_check_subreq_in_progress(front))
notes |= HIT_PENDING;
smp_rmb(); /* Read counters after IN_PROGRESS flag. */
transferred = READ_ONCE(front->transferred);
@@ -293,7 +293,9 @@ reassess:
spin_lock(&rreq->lock);
remove = front;
- trace_netfs_sreq(front, netfs_sreq_trace_discard);
+ trace_netfs_sreq(front,
+ notes & ABANDON_SREQ ?
+ netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
@@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_read_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index e2b102ffb768..0f3a36852a4d 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -240,7 +240,7 @@ reassess_streams:
}
/* Stall if the front is still undergoing I/O. */
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
+ if (netfs_check_subreq_in_progress(front)) {
notes |= HIT_PENDING;
break;
}
@@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
- if (wreq->cleanup)
- wreq->cleanup(wreq);
+ if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
+ wreq->origin == NETFS_DIO_WRITE) &&
+ !wreq->error)
+ netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
@@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
- if (wreq->iocb->ki_complete)
+ if (wreq->iocb->ki_complete) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : written);
+ }
wreq->iocb = VFS_PTR_POISON;
}
@@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_write_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 9d1d8a8bab72..fc9c3e0d34d8 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq = netfs_alloc_subrequest(wreq);
subreq->source = to->source;
subreq->start = start;
- subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = to->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df4807460596..4bea008dbebd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
+ goto out_retry;
+ case NFS4ERR_DELAY:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
- case -NFS4ERR_GRACE:
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
case -ENETDOWN:
case -ENETUNREACH:
@@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
+
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1228,6 +1272,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
- case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
+ case 4:
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data, task->tk_status);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg,
+ data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8ab7868807a7..a2fa6bc4d74e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int err;
nfs_clients_init(net);
if (!rpc_proc_register(net, &nn->rpcstats)) {
- nfs_clients_exit(net);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_proc_rpc;
}
- return nfs_fs_proc_net_init(net);
+ err = nfs_fs_proc_net_init(net);
+ if (err)
+ goto err_proc_nfs;
+
+ return 0;
+
+err_proc_nfs:
+ rpc_proc_unregister(net, "nfs");
+err_proc_rpc:
+ nfs_clients_exit(net);
+ return err;
}
static void nfs_net_exit(struct net *net)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3adb7d0dbec7..1a7ec68bde15 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
if (atomic_dec_and_test(&lo->plh_outstanding) &&
- test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+ smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ccb00aa93be0..e00b2aea8da2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
out:
if (!rcl->__nr_referring_calls) {
cb->cb_nr_referring_call_list--;
+ list_del(&rcl->__list);
kfree(rcl);
}
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3f3e9f6c4250..6a42cc7a845a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1611,7 +1611,7 @@ out_unlock:
*/
int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
{
- int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem;
+ int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem;
struct net *net = genl_info_net(info);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
const struct nlattr *attr;
@@ -1623,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
/* count number of SERVER_THREADS values */
nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
if (nla_type(attr) == NFSD_A_SERVER_THREADS)
- count++;
+ nrpools++;
}
mutex_lock(&nfsd_mutex);
- nrpools = max(count, nfsd_nrpools(net));
nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL);
if (!nthreads) {
ret = -ENOMEM;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 1fc68b59fa5d..76d6248b625e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -1418,7 +1418,7 @@ out:
bool ovl_lower_positive(struct dentry *dentry)
{
struct ovl_entry *poe = OVL_E(dentry->d_parent);
- struct qstr *name = &dentry->d_name;
+ const struct qstr *name = &dentry->d_name;
const struct cred *old_cred;
unsigned int i;
bool positive = false;
@@ -1441,9 +1441,15 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
+ /*
+ * We need to make a non-const copy of dentry->d_name,
+ * because lookup_one_positive_unlocked() will hash name
+ * with parentpath base, which is on another (lower fs).
+ */
this = lookup_one_positive_unlocked(
mnt_idmap(parentpath->layer->mnt),
- name, parentpath->dentry);
+ &QSTR_LEN(name->name, name->len),
+ parentpath->dentry);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
case -ENOENT:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 239a9c3642c0..55806bd36faa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
struct dentry *dentry,
umode_t mode)
{
- dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
- pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry));
- return dentry;
+ struct dentry *ret;
+
+ ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
+ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret));
+ return ret;
}
static inline int ovl_do_mknod(struct ovl_fs *ofs,
diff --git a/fs/pidfs.c b/fs/pidfs.c
index c1f0a067be40..69919be1c9d8 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
kinfo.pid = task_pid_vnr(task);
kinfo.mask |= PIDFD_INFO_PID;
- if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
+ if (kinfo.pid == 0 || kinfo.tgid == 0)
return -ESRCH;
copy_out:
diff --git a/fs/pnode.h b/fs/pnode.h
index 34b6247af01d..2d026fb98b18 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -28,8 +28,6 @@
#define CL_SHARED_TO_SLAVE 0x20
#define CL_COPY_MNT_NS_FILE 0x40
-#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
-
static inline void set_mnt_shared(struct mount *mnt)
{
mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a3eb3b740f76..3604b616311c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode)
head = ei->sysctl;
if (head) {
- RCU_INIT_POINTER(ei->sysctl, NULL);
+ WRITE_ONCE(ei->sysctl, NULL);
proc_sys_evict_inode(inode, head);
}
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index cc9d74a06ff0..08b78150cdde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry,
struct ctl_table_header *head;
struct inode *inode;
- /* Although proc doesn't have negative dentries, rcu-walk means
- * that inode here can be NULL */
- /* AV: can it, indeed? */
- inode = d_inode_rcu(dentry);
- if (!inode)
- return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- head = rcu_dereference(PROC_I(inode)->sysctl);
+
+ // false positive is fine here - we'll recheck anyway
+ if (d_in_lookup(dentry))
+ return 0;
+
+ inode = d_inode_rcu(dentry);
+ // we just might have run into dentry in the middle of __dentry_kill()
+ if (!inode)
+ return 1;
+
+ head = READ_ONCE(PROC_I(inode)->sysctl);
return !head || !sysctl_is_seen(head);
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 27972c0749e7..751479eb128f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_FILE;
}
- if (is_zero_pfn(pmd_pfn(pmd)))
+ if (is_huge_zero_pmd(pmd))
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 6ed2dfd4dbbd..d98e0d2de09f 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
struct rmid_read rr = {0};
struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
+ int domid, cpu, ret = 0;
struct rdt_resource *r;
+ struct cacheinfo *ci;
struct mon_data *md;
- int domid, ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
* one that matches this cache id.
*/
list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->ci->id == domid) {
- rr.ci = d->ci;
+ if (d->ci_id == domid) {
+ rr.ci_id = d->ci_id;
+ cpu = cpumask_any(&d->hdr.cpu_mask);
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci)
+ continue;
mon_event_read(&rr, r, NULL, rdtgrp,
- &d->ci->shared_cpu_map, evtid, false);
+ &ci->shared_cpu_map, evtid, false);
goto checkresult;
}
}
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 9a8cf6f11151..0a1eedba2b03 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -98,7 +98,7 @@ struct mon_data {
* domains in @r sharing L3 @ci.id
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
- * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
* @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
- struct cacheinfo *ci;
+ unsigned int ci_id;
int err;
u64 val;
void *arch_mon_ctx;
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index bde2801289d3..f5637855c3ac 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
int cpu = smp_processor_id();
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
struct mbm_state *m;
int err, ret;
u64 tval = 0;
@@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
}
/* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci || ci->id != rr->ci_id)
return -EINVAL;
/*
@@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
ret = -EINVAL;
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci->id != rr->ci->id)
+ if (d->ci_id != rr->ci_id)
continue;
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
rr->evtid, &tval, rr->arch_mon_ctx);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 1beb124e25f6..77d08229d855 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
char name[32];
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
if (snc_mode)
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
@@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
return -EPERM;
list_for_each_entry(mevt, &r->evt_list, list) {
- domid = do_sum ? d->ci->id : d->hdr.id;
+ domid = do_sum ? d->ci_id : d->hdr.id;
priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
if (WARN_ON_ONCE(!priv))
return -EINVAL;
@@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
lockdep_assert_held(&rdtgroup_mutex);
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
kn = kernfs_find_and_get(parent_kn, name);
if (kn) {
/*
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 5200a0f3cafc..368e870624da 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
spin_lock(&cfids->cfid_list_lock);
list_for_each_entry(cfid, &cfids->entries, entry) {
tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC);
- if (tmp_list == NULL)
- break;
+ if (tmp_list == NULL) {
+ /*
+ * If the malloc() fails, we won't drop all
+ * dentries, and unmounting is likely to trigger
+ * a 'Dentry still in use' error.
+ */
+ cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n");
+ spin_unlock(&cfids->cfid_list_lock);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ goto done;
+ }
spin_lock(&cfid->fid_lock);
tmp_list->dentry = cfid->dentry;
cfid->dentry = NULL;
@@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
}
spin_unlock(&cifs_sb->tlink_tree_lock);
+done:
list_for_each_entry_safe(tmp_list, q, &entry, entry) {
list_del(&tmp_list->entry);
dput(tmp_list->dentry);
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index bc8a812ff95f..a28f7cae3caa 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -26,7 +26,7 @@ struct cached_dirents {
* open file instance.
*/
struct mutex de_mutex;
- int pos; /* Expected ctx->pos */
+ loff_t pos; /* Expected ctx->pos */
struct list_head entries;
};
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index c0196be0e65f..3fdf75737d43 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if ((count < 1) || (count > 11))
return -EINVAL;
- memset(flags_string, 0, 12);
+ memset(flags_string, 0, sizeof(flags_string));
if (copy_from_user(flags_string, buffer, count))
return -EFAULT;
diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index 26327442e383..b51ce64fcccf 100644
--- a/fs/smb/client/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
@@ -61,7 +61,7 @@ struct smb_query_info {
struct smb3_key_debug_info {
__u64 Suid;
__u16 cipher_type;
- __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
+ __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE];
__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
} __packed;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 45e94e18f4d5..89160bc34d35 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
spinlock_t srv_lock; /* protect anything here that is not protected */
__u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */
@@ -776,6 +777,7 @@ struct TCP_Server_Info {
__le32 session_key_id; /* retrieved from negotiate response and send in session setup request */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
+ unsigned long neg_start; /* when negotiate started (jiffies) */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
@@ -1302,6 +1304,7 @@ struct cifs_tcon {
bool use_persistent:1; /* use persistent instead of durable handles */
bool no_lease:1; /* Do not request leases on files or directories */
bool use_witness:1; /* use witness protocol */
+ bool dummy:1; /* dummy tcon used for reconnecting channels */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 66093fa78aed..045227ed4efc 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
+void smb2_query_server_interfaces(struct work_struct *work);
void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels);
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 7216fcec79e8..75142f49d65d 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */
@@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
task_io_account_read(rdata->got_bytes);
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
if (rdata->result == -ENODATA) {
@@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index c4fb80b37738..205f547ca49e 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
return rc;
}
-static void smb2_query_server_interfaces(struct work_struct *work)
+void smb2_query_server_interfaces(struct work_struct *work)
{
int rc;
int xid;
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
+#define set_need_reco(server) \
+do { \
+ spin_lock(&server->srv_lock); \
+ if (server->tcpStatus != CifsExiting) \
+ server->tcpStatus = CifsNeedReconnect; \
+ spin_unlock(&server->srv_lock); \
+} while (0)
+
/*
* Update the tcpStatus for the server.
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels)
{
- struct TCP_Server_Info *pserver;
+ struct TCP_Server_Info *nserver;
struct cifs_ses *ses;
+ LIST_HEAD(reco);
int i;
- /* If server is a channel, select the primary channel */
- pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
-
/* if we need to signal just this channel */
if (!all_channels) {
- spin_lock(&server->srv_lock);
- if (server->tcpStatus != CifsExiting)
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&server->srv_lock);
+ set_need_reco(server);
return;
}
- spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
- if (cifs_ses_exiting(ses))
- continue;
- spin_lock(&ses->chan_lock);
- for (i = 0; i < ses->chan_count; i++) {
- if (!ses->chans[i].server)
+ if (SERVER_IS_CHAN(server))
+ server = server->primary_server;
+ scoped_guard(spinlock, &cifs_tcp_ses_lock) {
+ set_need_reco(server);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
continue;
-
- spin_lock(&ses->chans[i].server->srv_lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&ses->chans[i].server->srv_lock);
+ }
+ spin_lock(&ses->chan_lock);
+ for (i = 1; i < ses->chan_count; i++) {
+ nserver = ses->chans[i].server;
+ if (!nserver)
+ continue;
+ nserver->srv_count++;
+ list_add(&nserver->rlist, &reco);
+ }
+ spin_unlock(&ses->chan_lock);
+ spin_unlock(&ses->ses_lock);
}
- spin_unlock(&ses->chan_lock);
}
- spin_unlock(&cifs_tcp_ses_lock);
+
+ list_for_each_entry_safe(server, nserver, &reco, rlist) {
+ list_del_init(&server->rlist);
+ set_need_reco(server);
+ cifs_put_tcp_session(server, 0);
+ }
}
/*
@@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server)
/*
* If we're in the process of mounting a share or reconnecting a session
* and the server abruptly shut down (e.g. socket wasn't closed, packet
- * had been ACK'ed but no SMB response), don't wait longer than 20s to
- * negotiate protocol.
+ * had been ACK'ed but no SMB response), don't wait longer than 20s from
+ * when negotiate actually started.
*/
spin_lock(&server->srv_lock);
if (server->tcpStatus == CifsInNegotiate &&
- time_after(jiffies, server->lstrp + 20 * HZ)) {
+ time_after(jiffies, server->neg_start + 20 * HZ)) {
spin_unlock(&server->srv_lock);
cifs_reconnect(server, false);
return true;
@@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
tcon->max_cached_dirs = ctx->max_cached_dirs;
tcon->nodelete = ctx->nodelete;
tcon->local_lease = ctx->local_lease;
- INIT_LIST_HEAD(&tcon->pending_opens);
tcon->status = TID_GOOD;
- INIT_DELAYED_WORK(&tcon->query_interfaces,
- smb2_query_server_interfaces);
if (ses->server->dialect >= SMB30_PROT_ID &&
(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
/* schedule query interfaces poll */
queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh);
-#endif
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -4199,7 +4207,9 @@ retry:
return 0;
}
+ server->lstrp = jiffies;
server->tcpStatus = CifsInNegotiate;
+ server->neg_start = jiffies;
spin_unlock(&server->srv_lock);
rc = server->ops->negotiate(xid, ses, server);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 9835672267d2..e9212da32f01 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq)
struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr];
struct TCP_Server_Info *server;
struct cifsFileInfo *open_file = req->cfile;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb);
size_t wsize = req->rreq.wsize;
int rc;
@@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq)
server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
wdata->server = server;
+ if (cifs_sb->ctx->wsize == 0)
+ cifs_negotiate_wsize(server, cifs_sb->ctx,
+ tlink_tcon(req->cfile->tlink));
+
retry:
if (open_file->invalidHandle) {
rc = cifs_reopen_file(open_file, false);
@@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
- if (cifs_sb->ctx->rsize == 0) {
+ if (cifs_sb->ctx->rsize == 0)
cifs_negotiate_rsize(server, cifs_sb->ctx,
tlink_tcon(req->cfile->tlink));
- }
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&size, &rdata->credits);
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a634a34d4086..59ccc2229ab3 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_errorf(fc, "symlinkroot mount options must be absolute path\n");
goto cifs_parse_mount_err;
}
- kfree(ctx->symlinkroot);
- ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->symlinkroot)
+ if (strnlen(param->string, PATH_MAX) == PATH_MAX) {
+ cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n",
+ PATH_MAX - 1);
goto cifs_parse_mount_err;
+ }
+ kfree(ctx->symlinkroot);
+ ctx->symlinkroot = param->string;
+ param->string = NULL;
break;
}
/* case Opt_ignore: - is ignored as expected ... */
@@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
- /*
- * By default resolve all native absolute symlinks relative to "/mnt/".
- * Same default has drvfs driver running in WSL for resolving SMB shares.
- */
- if (!ctx->symlinkroot)
- ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL);
-
return 0;
cifs_parse_mount_err:
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 56439da4f119..0a9935ce05a5 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
le16_to_cpu(tcon->ses->server->cipher_type);
pkey_inf.Suid = tcon->ses->Suid;
memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response,
- 16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
+ SMB2_NTLMV2_SESSKEY_SIZE);
memcpy(pkey_inf.smb3decryptionkey,
tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
memcpy(pkey_inf.smb3encryptionkey,
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index e77017f47084..da23cc12a52c 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
#ifdef CONFIG_CIFS_DFS_UPCALL
INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
#endif
+ INIT_LIST_HEAD(&ret_buf->pending_opens);
+ INIT_DELAYED_WORK(&ret_buf->query_interfaces,
+ smb2_query_server_interfaces);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh);
+#endif
return ret_buf;
}
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index ba0193cf9033..4e5460206397 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info,
/* The Mode field in the response can now include the file type as well */
fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode),
fattr->cf_cifsattrs & ATTR_DIRECTORY);
- fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode));
+ fattr->cf_dtype = S_DT(fattr->cf_mode);
switch (fattr->cf_mode & S_IFMT) {
case S_IFLNK:
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index bb25e77c5540..5fa29a97ac15 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
struct reparse_symlink_data_buffer *buf = NULL;
struct cifs_open_info_data data = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ const char *symroot = cifs_sb->ctx->symlinkroot;
struct inode *new;
struct kvec iov;
__le16 *path = NULL;
@@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
.symlink_target = symlink_target,
};
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && symname[0] == '/') {
/*
* This is a request to create an absolute symlink on the server
* which does not support POSIX paths, and expects symlink in
@@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
* ensure compatibility of this symlink stored in absolute form
* on the SMB server.
*/
- if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) {
+ if (!strstarts(symname, symroot)) {
/*
* If the absolute Linux symlink target path is not
* inside "symlinkroot" location then there is no way
@@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
cifs_dbg(VFS,
"absolute symlink '%s' cannot be converted to NT format "
"because it is outside of symlinkroot='%s'\n",
- symname, cifs_sb->ctx->symlinkroot);
+ symname, symroot);
rc = -EINVAL;
goto out;
}
- len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[len-1] != '/')
+ len = strlen(symroot);
+ if (symroot[len - 1] != '/')
len++;
if (symname[len] >= 'a' && symname[len] <= 'z' &&
(symname[len+1] == '/' || symname[len+1] == '\0')) {
@@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
const char *full_path,
struct cifs_sb_info *cifs_sb)
{
+ const char *symroot = cifs_sb->ctx->symlinkroot;
char sep = CIFS_DIR_SEP(cifs_sb);
char *linux_target = NULL;
char *smb_target = NULL;
@@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
goto out;
}
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && !relative) {
/*
* This is an absolute symlink from the server which does not
* support POSIX paths, so the symlink is in NT-style path.
@@ -875,15 +879,8 @@ globalroot:
abs_path += sizeof("\\DosDevices\\")-1;
else if (strstarts(abs_path, "\\GLOBAL??\\"))
abs_path += sizeof("\\GLOBAL??\\")-1;
- else {
- /* Unhandled absolute symlink, points outside of DOS/Win32 */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
- }
+ else
+ goto out_unhandled_target;
/* Sometimes path separator after \?? is double backslash */
if (abs_path[0] == '\\')
@@ -910,25 +907,19 @@ globalroot:
abs_path++;
abs_path[0] = drive_letter;
} else {
- /* Unhandled absolute symlink. Report an error. */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
+ goto out_unhandled_target;
}
abs_path_len = strlen(abs_path)+1;
- symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/')
+ symlinkroot_len = strlen(symroot);
+ if (symroot[symlinkroot_len - 1] == '/')
symlinkroot_len--;
linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL);
if (!linux_target) {
rc = -ENOMEM;
goto out;
}
- memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len);
+ memcpy(linux_target, symroot, symlinkroot_len);
linux_target[symlinkroot_len] = '/';
memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len);
} else if (smb_target[0] == sep && relative) {
@@ -966,6 +957,7 @@ globalroot:
* These paths have same format as Linux symlinks, so no
* conversion is needed.
*/
+out_unhandled_target:
linux_target = smb_target;
smb_target = NULL;
}
@@ -1172,7 +1164,6 @@ out:
if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK))
return false;
- fattr->cf_dtype = S_DT(fattr->cf_mode);
return true;
}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index ec0db32c7d98..330bc3d25bad 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses,
ctx->domainauto = ses->domainAuto;
ctx->domainname = ses->domainName;
- /* no hostname for extra channels */
- ctx->server_hostname = "";
+ ctx->server_hostname = ses->server->hostname;
ctx->username = ses->user_name;
ctx->password = ses->password;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index a717be1626a3..2df93a75e3b8 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -424,9 +424,9 @@ skip_sess_setup:
free_xid(xid);
ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES;
- /* regardless of rc value, setup polling */
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ if (!tcon->ipc && !tcon->dummy)
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
mutex_unlock(&ses->session_mutex);
@@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work)
}
goto done;
}
-
tcon->status = TID_GOOD;
- tcon->retry = false;
- tcon->need_reconnect = false;
+ tcon->dummy = true;
/* now reconnect sessions for necessary channels */
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
@@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
@@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(shdr->CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
@@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
result = smb2_check_receive(mid, server, 0);
- if (result != 0)
+ if (result != 0) {
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad);
break;
+ }
written = le32_to_cpu(rsp->DataLength);
/*
@@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
@@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 5ae847919da5..754e94a0e07f 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -907,8 +907,10 @@ wait_send_queue:
.local_dma_lkey = sc->ib.pd->local_dma_lkey,
.direction = DMA_TO_DEVICE,
};
+ size_t payload_len = umin(*_remaining_data_length,
+ sp->max_send_size - sizeof(*packet));
- rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length,
+ rc = smb_extract_iter_to_rdma(iter, payload_len,
&extract);
if (rc < 0)
goto err_dma;
@@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info)
return smbd_post_send_iter(info, NULL, &remaining_data_length);
}
+static int smbd_post_send_full_iter(struct smbd_connection *info,
+ struct iov_iter *iter,
+ int *_remaining_data_length)
+{
+ int rc = 0;
+
+ /*
+ * smbd_post_send_iter() respects the
+ * negotiated max_send_size, so we need to
+ * loop until the full iter is posted
+ */
+
+ while (iov_iter_count(iter) > 0) {
+ rc = smbd_post_send_iter(info, iter, _remaining_data_length);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
/*
* Post a receive request to the transport
* The remote peer can only send data when a receive request is posted
@@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
char name[MAX_NAME_LEN];
int rc;
+ if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer)))
+ return -ENOMEM;
+
scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
info->request_cache =
kmem_cache_create(
@@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
goto out1;
scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
+
+ struct kmem_cache_args response_args = {
+ .align = __alignof__(struct smbd_response),
+ .useroffset = (offsetof(struct smbd_response, packet) +
+ sizeof(struct smbdirect_data_transfer)),
+ .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer),
+ };
info->response_cache =
- kmem_cache_create(
- name,
- sizeof(struct smbd_response) +
- sp->max_recv_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ kmem_cache_create(name,
+ sizeof(struct smbd_response) + sp->max_recv_size,
+ &response_args, SLAB_HWCACHE_ALIGN);
if (!info->response_cache)
goto out2;
@@ -1747,35 +1778,39 @@ try_again:
}
/*
- * Receive data from receive reassembly queue
+ * Receive data from the transport's receive reassembly queue
* All the incoming data packets are placed in reassembly queue
- * buf: the buffer to read data into
+ * iter: the buffer to read data into
* size: the length of data to read
* return value: actual data read
- * Note: this implementation copies the data from reassebmly queue to receive
+ *
+ * Note: this implementation copies the data from reassembly queue to receive
* buffers used by upper layer. This is not the optimal code path. A better way
* to do it is to not have upper layer allocate its receive buffers but rather
* borrow the buffer from reassembly queue, and return it after data is
* consumed. But this will require more changes to upper layer code, and also
* need to consider packet boundaries while they still being reassembled.
*/
-static int smbd_recv_buf(struct smbd_connection *info, char *buf,
- unsigned int size)
+int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{
struct smbdirect_socket *sc = &info->socket;
struct smbd_response *response;
struct smbdirect_data_transfer *data_transfer;
+ size_t size = iov_iter_count(&msg->msg_iter);
int to_copy, to_read, data_read, offset;
u32 data_length, remaining_data_length, data_offset;
int rc;
+ if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE))
+ return -EINVAL; /* It's a bug in upper layer to get there */
+
again:
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue. The transport
* may add more entries to the back of the queue at the same time
*/
- log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
+ log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size,
info->reassembly_data_length);
if (info->reassembly_data_length >= size) {
int queue_length;
@@ -1813,7 +1848,10 @@ again:
if (response->first_segment && size == 4) {
unsigned int rfc1002_len =
data_length + remaining_data_length;
- *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len);
+ if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr),
+ &msg->msg_iter) != sizeof(rfc1002_hdr))
+ return -EFAULT;
data_read = 4;
response->first_segment = false;
log_read(INFO, "returning rfc1002 length %d\n",
@@ -1822,10 +1860,9 @@ again:
}
to_copy = min_t(int, data_length - offset, to_read);
- memcpy(
- buf + data_read,
- (char *)data_transfer + data_offset + offset,
- to_copy);
+ if (copy_to_iter((char *)data_transfer + data_offset + offset,
+ to_copy, &msg->msg_iter) != to_copy)
+ return -EFAULT;
/* move on to the next buffer? */
if (to_copy == data_length - offset) {
@@ -1891,90 +1928,6 @@ read_rfc1002_done:
}
/*
- * Receive a page from receive reassembly queue
- * page: the page to read data into
- * to_read: the length of data to read
- * return value: actual data read
- */
-static int smbd_recv_page(struct smbd_connection *info,
- struct page *page, unsigned int page_offset,
- unsigned int to_read)
-{
- struct smbdirect_socket *sc = &info->socket;
- int ret;
- char *to_address;
- void *page_address;
-
- /* make sure we have the page ready for read */
- ret = wait_event_interruptible(
- info->wait_reassembly_queue,
- info->reassembly_data_length >= to_read ||
- sc->status != SMBDIRECT_SOCKET_CONNECTED);
- if (ret)
- return ret;
-
- /* now we can read from reassembly queue and not sleep */
- page_address = kmap_atomic(page);
- to_address = (char *) page_address + page_offset;
-
- log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
- page, to_address, to_read);
-
- ret = smbd_recv_buf(info, to_address, to_read);
- kunmap_atomic(page_address);
-
- return ret;
-}
-
-/*
- * Receive data from transport
- * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
- * return: total bytes read, or 0. SMB Direct will not do partial read.
- */
-int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
-{
- char *buf;
- struct page *page;
- unsigned int to_read, page_offset;
- int rc;
-
- if (iov_iter_rw(&msg->msg_iter) == WRITE) {
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg iter dir %u\n",
- iov_iter_rw(&msg->msg_iter));
- rc = -EINVAL;
- goto out;
- }
-
- switch (iov_iter_type(&msg->msg_iter)) {
- case ITER_KVEC:
- buf = msg->msg_iter.kvec->iov_base;
- to_read = msg->msg_iter.kvec->iov_len;
- rc = smbd_recv_buf(info, buf, to_read);
- break;
-
- case ITER_BVEC:
- page = msg->msg_iter.bvec->bv_page;
- page_offset = msg->msg_iter.bvec->bv_offset;
- to_read = msg->msg_iter.bvec->bv_len;
- rc = smbd_recv_page(info, page, page_offset, to_read);
- break;
-
- default:
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg type %d\n",
- iov_iter_type(&msg->msg_iter));
- rc = -EINVAL;
- }
-
-out:
- /* SMBDirect will read it all or nothing */
- if (rc > 0)
- msg->msg_iter.count = 0;
- return rc;
-}
-
-/*
* Send data to transport
* Each rqst is transported as a SMBDirect payload
* rqst: the data to write
@@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server,
klen += rqst->rq_iov[i].iov_len;
iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen);
- rc = smbd_post_send_iter(info, &iter, &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length);
if (rc < 0)
break;
if (iov_iter_count(&rqst->rq_iter) > 0) {
/* And then the data pages if there are any */
- rc = smbd_post_send_iter(info, &rqst->rq_iter,
- &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &rqst->rq_iter,
+ &remaining_data_length);
if (rc < 0)
break;
}
@@ -2589,13 +2542,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter,
size_t fsize = folioq_folio_size(folioq, slot);
if (offset < fsize) {
- size_t part = umin(maxsize - ret, fsize - offset);
+ size_t part = umin(maxsize, fsize - offset);
if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part))
return -EIO;
offset += part;
ret += part;
+ maxsize -= part;
}
if (offset >= fsize) {
@@ -2610,7 +2564,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter,
slot = 0;
}
}
- } while (rdma->nr_sge < rdma->max_sge || maxsize > 0);
+ } while (rdma->nr_sge < rdma->max_sge && maxsize > 0);
iter->folioq = folioq;
iter->folioq_slot = slot;
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 52bcb55d9952..93e5b2bb9f28 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->rreq_debug_id, __entry->rreq_debug_index,
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
@@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
)
@@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc)
)
@@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class,
__entry->target_offset = target_offset;
__entry->len = len;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len)
)
@@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class,
__entry->tid = tid;
__entry->sesid = sesid;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid)
)
@@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class,
__entry->sesid = sesid;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->rc)
)
@@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class,
__entry->status = status;
__entry->rc = rc;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
__entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
__entry->status, __entry->rc)
)
@@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class,
__entry->cmd = cmd;
__entry->mid = mid;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu",
__entry->sesid, __entry->tid,
__entry->cmd, __entry->mid)
)
@@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class,
__entry->when_sent = when_sent;
__entry->when_received = when_received;
),
- TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
+ TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
__entry->cmd, __entry->mid, __entry->pid, __entry->when_sent,
__entry->when_received)
)
@@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
__assign_str(func_name);
__entry->rc = rc;
),
- TP_printk("\t%s: xid=%u rc=%d",
+ TP_printk("%s: xid=%u rc=%d",
__get_str(func_name), __entry->xid, __entry->rc)
)
@@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class,
__entry->ino = ino;
__entry->rc = rc;
),
- TP_printk("\tino=%lu rc=%d",
+ TP_printk("ino=%lu rc=%d",
__entry->ino, __entry->rc)
)
@@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
__entry->xid = xid;
__assign_str(func_name);
),
- TP_printk("\t%s: xid=%u",
+ TP_printk("%s: xid=%u",
__get_str(func_name), __entry->xid)
)
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index 83764c230e9d..3f04a2977ba8 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn)
kvfree(conn->request_buf);
kfree(conn->preauth_info);
if (atomic_dec_and_test(&conn->refcnt)) {
- ksmbd_free_transport(conn->transport);
+ conn->transport->ops->free_transport(conn->transport);
kfree(conn);
}
}
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 6efed923bd68..dd3e0e3f7bf0 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -133,6 +133,7 @@ struct ksmbd_transport_ops {
void *buf, unsigned int len,
struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len);
+ void (*free_transport)(struct ksmbd_transport *kt);
};
struct ksmbd_transport {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 1a308171b599..63d17cea2e95 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work,
out_len = work->response_sz -
(le16_to_cpu(rsp->SecurityBufferOffset) + 4);
- /* Check previous session */
- prev_sess_id = le64_to_cpu(req->PreviousSessionId);
- if (prev_sess_id && prev_sess_id != sess->id)
- destroy_previous_session(conn, sess->user, prev_sess_id);
-
retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
out_blob, &out_len);
if (retval) {
ksmbd_debug(SMB, "krb5 authentication failed\n");
return -EINVAL;
}
+
+ /* Check previous session */
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_sess_id && prev_sess_id != sess->id)
+ destroy_previous_session(conn, sess->user, prev_sess_id);
+
rsp->SecurityBufferLength = cpu_to_le16(out_len);
if ((conn->sign || server_conf.enforced_signing) ||
@@ -4871,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp,
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
- sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9);
- sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ sinfo->AllocationSize = cpu_to_le64(fp->stream.size);
+ sinfo->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
sinfo->DeletePending = delete_pending;
sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
@@ -4935,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
file_info->Pad1 = 0;
- file_info->AllocationSize =
- cpu_to_le64(stat.blocks << 9);
- file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->AllocationSize =
+ cpu_to_le64(stat.blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
file_info->NumberOfLinks =
cpu_to_le32(get_nlink(&stat) - delete_pending);
file_info->DeletePending = delete_pending;
@@ -4946,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->IndexNumber = cpu_to_le64(stat.ino);
file_info->EASize = 0;
file_info->AccessFlags = fp->daccess;
- file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ if (ksmbd_stream_fd(fp) == false)
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ else
+ file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos);
file_info->Mode = fp->coption;
file_info->AlignmentRequirement = 0;
conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
@@ -5134,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
time = ksmbd_UnixTimeToNT(stat.ctime);
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
- file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
- file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
@@ -5158,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_pos_info *file_info;
file_info = (struct smb2_file_pos_info *)rsp->Buffer;
- file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ if (ksmbd_stream_fd(fp) == false)
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ else
+ file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos);
+
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_pos_info));
}
@@ -5247,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
file_info->ChangeTime = cpu_to_le64(time);
file_info->DosAttributes = fp->f_ci->m_fattr;
file_info->Inode = cpu_to_le64(stat.ino);
- file_info->EndOfFile = cpu_to_le64(stat.size);
- file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->EndOfFile = cpu_to_le64(stat.size);
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ } else {
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ }
file_info->HardLinks = cpu_to_le32(stat.nlink);
file_info->Mode = cpu_to_le32(stat.mode & 0777);
switch (stat.mode & S_IFMT) {
@@ -6190,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work,
if (!(fp->daccess & FILE_WRITE_DATA_LE))
return -EACCES;
+ if (ksmbd_stream_fd(fp) == true)
+ return 0;
+
rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
AT_STATX_SYNC_AS_STAT);
if (rc)
@@ -6248,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
* truncate of some filesystem like FAT32 fill zero data in
* truncated range.
*/
- if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
+ if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC &&
+ ksmbd_stream_fd(fp) == false) {
ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize);
rc = ksmbd_vfs_truncate(work, fp, newsize);
if (rc) {
@@ -6321,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp,
return -EINVAL;
}
- fp->filp->f_pos = current_byte_offset;
+ if (ksmbd_stream_fd(fp) == false)
+ fp->filp->f_pos = current_byte_offset;
+ else {
+ if (current_byte_offset > XATTR_SIZE_MAX)
+ current_byte_offset = XATTR_SIZE_MAX;
+ fp->stream.pos = current_byte_offset;
+ }
return 0;
}
@@ -8535,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
-
rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
rsp->Reserved = 0;
@@ -8547,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
rsp->VolatileFid = volatile_id;
rsp->PersistentFid = persistent_id;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
-
opinfo_put(opinfo);
ksmbd_fd_put(work, fp);
- smb2_set_err_rsp(work);
}
static int check_lease_state(struct lease *lease, __le32 req_state)
@@ -8686,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
}
lease_state = lease->state;
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- atomic_dec(&opinfo->breaking_cnt);
- wake_up_interruptible_all(&opinfo->oplock_brk);
- opinfo_put(opinfo);
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
@@ -8699,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
rsp->LeaseState = lease_state;
rsp->LeaseDuration = 0;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
+ opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
-
opinfo_put(opinfo);
- smb2_set_err_rsp(work);
}
/**
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 4998df04ab95..c6cbe0d56e32 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -159,7 +159,8 @@ struct smb_direct_transport {
};
#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
-
+#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \
+ struct smb_direct_transport, transport))
enum {
SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
SMB_DIRECT_MSG_DATA_TRANSFER
@@ -410,6 +411,11 @@ err:
return NULL;
}
+static void smb_direct_free_transport(struct ksmbd_transport *kt)
+{
+ kfree(SMBD_TRANS(kt));
+}
+
static void free_transport(struct smb_direct_transport *t)
{
struct smb_direct_recvmsg *recvmsg;
@@ -427,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
- ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
ksmbd_debug(RDMA, "drain the reassembly queue\n");
@@ -455,7 +462,6 @@ static void free_transport(struct smb_direct_transport *t)
smb_direct_destroy_pools(t);
ksmbd_conn_free(KSMBD_TRANS(t)->conn);
- kfree(t);
}
static struct smb_direct_sendmsg
@@ -1935,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
return 0;
err:
if (t->qp) {
- ib_destroy_qp(t->qp);
t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
if (t->recv_cq) {
ib_destroy_cq(t->recv_cq);
@@ -2281,4 +2287,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
.rdma_write = smb_direct_rdma_write,
+ .free_transport = smb_direct_free_transport,
};
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index abedf510899a..4e9f98db9ff4 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk)
return t;
}
-void ksmbd_free_transport(struct ksmbd_transport *kt)
+static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt)
{
struct tcp_transport *t = TCP_TRANS(kt);
@@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
.read = ksmbd_tcp_read,
.writev = ksmbd_tcp_writev,
.disconnect = ksmbd_tcp_disconnect,
+ .free_transport = ksmbd_tcp_free_transport,
};
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index ba45e809555a..d3437f6644e3 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
if (v_len - *pos < count)
count = v_len - *pos;
+ fp->stream.pos = v_len;
memcpy(buf, &stream_buf[*pos], count);
@@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
true);
if (err < 0)
goto out;
-
- fp->filp->f_pos = *pos;
+ else
+ fp->stream.pos = size;
err = 0;
out:
kvfree(stream_buf);
@@ -1281,6 +1282,7 @@ out1:
err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
if (err) {
+ mnt_drop_write(parent_path->mnt);
path_put(path);
path_put(parent_path);
}
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 5bbb179736c2..0708155b5caf 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -44,6 +44,7 @@ struct ksmbd_lock {
struct stream {
char *name;
ssize_t size;
+ loff_t pos;
};
struct ksmbd_inode {
diff --git a/fs/super.c b/fs/super.c
index 21799e213fd7..80418ca8e215 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type,
spin_unlock(&sb_lock);
locked = super_lock_shared(sb);
- if (locked)
+ if (locked) {
f(sb, arg);
+ super_unlock_shared(sb);
+ }
spin_lock(&sb_lock);
if (p)
diff --git a/fs/xattr.c b/fs/xattr.c
index 8ec5b0204bfd..600ae97969cf 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
buffer += err;
}
remaining_size -= err;
+ err = 0;
read_lock(&xattrs->lock);
for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7839efe050bf..000cc7f4a3ce 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3444,16 +3444,41 @@ xfs_alloc_read_agf(
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
+
#ifdef DEBUG
- else if (!xfs_is_shutdown(mp)) {
- ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
- ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
- ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
- ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
- ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
- ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
+ /*
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
+ */
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks);
+ ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount);
+ ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest);
+ ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level);
+ ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
+ xfs_trans_brelse(tp, agfbp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
}
-#endif
+#endif /* DEBUG */
+
if (agfbpp)
*agfbpp = agfbp;
else
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0c47b5c6ca7d..750111634d9f 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi(
set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
}
+#ifdef DEBUG
/*
- * It's possible for these to be out of sync if
- * we are in the middle of a forced shutdown.
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
*/
- ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag_mount(pag)));
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
+ ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ xfs_trans_brelse(tp, agibp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
+ }
+#endif /* DEBUG */
+
if (agibpp)
*agibpp = agibp;
else
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8af83bd161f9..ba5bd6031ece 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit(
return error;
}
-/*
- * Push a single buffer on a delwri queue.
- *
- * The purpose of this function is to submit a single buffer of a delwri queue
- * and return with the buffer still on the original queue.
- *
- * The buffer locking and queue management logic between _delwri_pushbuf() and
- * _delwri_queue() guarantee that the buffer cannot be queued to another list
- * before returning.
- */
-int
-xfs_buf_delwri_pushbuf(
- struct xfs_buf *bp,
- struct list_head *buffer_list)
-{
- int error;
-
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
- trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
-
- xfs_buf_lock(bp);
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
- bp->b_flags |= XBF_WRITE;
- xfs_buf_submit(bp);
-
- /*
- * The buffer is now locked, under I/O but still on the original delwri
- * queue. Wait for I/O completion, restore the DELWRI_Q flag and
- * return with the buffer unlocked and still on the original queue.
- */
- error = xfs_buf_iowait(bp);
- bp->b_flags |= _XBF_DELWRI_Q;
- xfs_buf_unlock(bp);
-
- return error;
-}
-
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
/*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9d2ab567cf81..15fc56948346 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
-extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 90139e0f3271..7fc54725c5f6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
+static void
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->__bli_format;
+ return;
+ }
+
+ bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
+ GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->__bli_format) {
+ kfree(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
+
+static void
+xfs_buf_item_free(
+ struct xfs_buf_log_item *bip)
+{
+ xfs_buf_item_free_format(bip);
+ kvfree(bip->bli_item.li_lv_shadow);
+ kmem_cache_free(xfs_buf_item_cache, bip);
+}
+
+/*
+ * xfs_buf_item_relse() is called when the buf log item is no longer needed.
+ */
+static void
+xfs_buf_item_relse(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+
+ trace_xfs_buf_item_relse(bp, _RET_IP_);
+
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+ ASSERT(atomic_read(&bip->bli_refcount) == 0);
+
+ bp->b_log_item = NULL;
+ xfs_buf_rele(bp);
+ xfs_buf_item_free(bip);
+}
+
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
@@ -390,6 +445,42 @@ xfs_buf_item_pin(
}
/*
+ * For a stale BLI, process all the necessary completions that must be
+ * performed when the final BLI reference goes away. The buffer will be
+ * referenced and locked here - we return to the caller with the buffer still
+ * referenced and locked for them to finalise processing of the buffer.
+ */
+static void
+xfs_buf_item_finish_stale(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_log_item *lip = &bip->bli_item;
+
+ ASSERT(bip->bli_flags & XFS_BLI_STALE);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_flags & XBF_STALE);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(list_empty(&lip->li_trans));
+ ASSERT(!bp->b_transp);
+
+ if (bip->bli_flags & XFS_BLI_STALE_INODE) {
+ xfs_buf_item_done(bp);
+ xfs_buf_inode_iodone(bp);
+ ASSERT(list_empty(&bp->b_li_list));
+ return;
+ }
+
+ /*
+ * We may or may not be on the AIL here, xfs_trans_ail_delete() will do
+ * the right thing regardless of the situation in which we are called.
+ */
+ xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bip);
+ ASSERT(bp->b_log_item == NULL);
+}
+
+/*
* This is called to unpin the buffer associated with the buf log item which was
* previously pinned with a call to xfs_buf_item_pin(). We enter this function
* with a buffer pin count, a buffer reference and a BLI reference.
@@ -438,13 +529,6 @@ xfs_buf_item_unpin(
}
if (stale) {
- ASSERT(bip->bli_flags & XFS_BLI_STALE);
- ASSERT(xfs_buf_islocked(bp));
- ASSERT(bp->b_flags & XBF_STALE);
- ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
- ASSERT(list_empty(&lip->li_trans));
- ASSERT(!bp->b_transp);
-
trace_xfs_buf_item_unpin_stale(bip);
/*
@@ -455,22 +539,7 @@ xfs_buf_item_unpin(
* processing is complete.
*/
xfs_buf_rele(bp);
-
- /*
- * If we get called here because of an IO error, we may or may
- * not have the item on the AIL. xfs_trans_ail_delete() will
- * take care of that situation. xfs_trans_ail_delete() drops
- * the AIL lock.
- */
- if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_item_done(bp);
- xfs_buf_inode_iodone(bp);
- ASSERT(list_empty(&bp->b_li_list));
- } else {
- xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
- xfs_buf_item_relse(bp);
- ASSERT(bp->b_log_item == NULL);
- }
+ xfs_buf_item_finish_stale(bip);
xfs_buf_relse(bp);
return;
}
@@ -543,43 +612,42 @@ xfs_buf_item_push(
* Drop the buffer log item refcount and take appropriate action. This helper
* determines whether the bli must be freed or not, since a decrement to zero
* does not necessarily mean the bli is unused.
- *
- * Return true if the bli is freed, false otherwise.
*/
-bool
+void
xfs_buf_item_put(
struct xfs_buf_log_item *bip)
{
- struct xfs_log_item *lip = &bip->bli_item;
- bool aborted;
- bool dirty;
+
+ ASSERT(xfs_buf_islocked(bip->bli_buf));
/* drop the bli ref and return if it wasn't the last one */
if (!atomic_dec_and_test(&bip->bli_refcount))
- return false;
+ return;
- /*
- * We dropped the last ref and must free the item if clean or aborted.
- * If the bli is dirty and non-aborted, the buffer was clean in the
- * transaction but still awaiting writeback from previous changes. In
- * that case, the bli is freed on buffer writeback completion.
- */
- aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
- xlog_is_shutdown(lip->li_log);
- dirty = bip->bli_flags & XFS_BLI_DIRTY;
- if (dirty && !aborted)
- return false;
+ /* If the BLI is in the AIL, then it is still dirty and in use */
+ if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
+ ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
+ return;
+ }
/*
- * The bli is aborted or clean. An aborted item may be in the AIL
- * regardless of dirty state. For example, consider an aborted
- * transaction that invalidated a dirty bli and cleared the dirty
- * state.
+ * In shutdown conditions, we can be asked to free a dirty BLI that
+ * isn't in the AIL. This can occur due to a checkpoint aborting a BLI
+ * instead of inserting it into the AIL at checkpoint IO completion. If
+ * there's another bli reference (e.g. a btree cursor holds a clean
+ * reference) and it is released via xfs_trans_brelse(), we can get here
+ * with that aborted, dirty BLI. In this case, it is safe to free the
+ * dirty BLI immediately, as it is not in the AIL and there are no
+ * other references to it.
+ *
+ * We should never get here with a stale BLI via that path as
+ * xfs_trans_brelse() specifically holds onto stale buffers rather than
+ * releasing them.
*/
- if (aborted)
- xfs_trans_ail_delete(lip, 0);
- xfs_buf_item_relse(bip->bli_buf);
- return true;
+ ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
+ test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
+ ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+ xfs_buf_item_relse(bip);
}
/*
@@ -600,6 +668,15 @@ xfs_buf_item_put(
* if necessary but do not unlock the buffer. This is for support of
* xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
* free the item.
+ *
+ * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
+ * perform a completion abort of any objects attached to the buffer for IO
+ * tracking purposes. This generally only happens in shutdown situations,
+ * normally xfs_buf_item_unpin() will drop the last BLI reference and perform
+ * completion processing. However, because transaction completion can race with
+ * checkpoint completion during a shutdown, this release context may end up
+ * being the last active reference to the BLI and so needs to perform this
+ * cleanup.
*/
STATIC void
xfs_buf_item_release(
@@ -607,18 +684,19 @@ xfs_buf_item_release(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- bool released;
bool hold = bip->bli_flags & XFS_BLI_HOLD;
bool stale = bip->bli_flags & XFS_BLI_STALE;
-#if defined(DEBUG) || defined(XFS_WARN)
- bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
- bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
bool aborted = test_bit(XFS_LI_ABORTED,
&lip->li_flags);
+ bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
+#if defined(DEBUG) || defined(XFS_WARN)
+ bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
#endif
trace_xfs_buf_item_release(bip);
+ ASSERT(xfs_buf_islocked(bp));
+
/*
* The bli dirty state should match whether the blf has logged segments
* except for ordered buffers, where only the bli should be dirty.
@@ -634,16 +712,56 @@ xfs_buf_item_release(
bp->b_transp = NULL;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
+ /* If there are other references, then we have nothing to do. */
+ if (!atomic_dec_and_test(&bip->bli_refcount))
+ goto out_release;
+
+ /*
+ * Stale buffer completion frees the BLI, unlocks and releases the
+ * buffer. Neither the BLI or buffer are safe to reference after this
+ * call, so there's nothing more we need to do here.
+ *
+ * If we get here with a stale buffer and references to the BLI remain,
+ * we must not unlock the buffer as the last BLI reference owns lock
+ * context, not us.
+ */
+ if (stale) {
+ xfs_buf_item_finish_stale(bip);
+ xfs_buf_relse(bp);
+ ASSERT(!hold);
+ return;
+ }
+
/*
- * Unref the item and unlock the buffer unless held or stale. Stale
- * buffers remain locked until final unpin unless the bli is freed by
- * the unref call. The latter implies shutdown because buffer
- * invalidation dirties the bli and transaction.
+ * Dirty or clean, aborted items are done and need to be removed from
+ * the AIL and released. This frees the BLI, but leaves the buffer
+ * locked and referenced.
*/
- released = xfs_buf_item_put(bip);
- if (hold || (stale && !released))
+ if (aborted || xlog_is_shutdown(lip->li_log)) {
+ ASSERT(list_empty(&bip->bli_buf->b_li_list));
+ xfs_buf_item_done(bp);
+ goto out_release;
+ }
+
+ /*
+ * Clean, unreferenced BLIs can be immediately freed, leaving the buffer
+ * locked and referenced.
+ *
+ * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback.
+ */
+ if (!dirty)
+ xfs_buf_item_relse(bip);
+ else
+ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
+
+ /* Not safe to reference the BLI from here */
+out_release:
+ /*
+ * If we get here with a stale buffer, we must not unlock the
+ * buffer as the last BLI reference owns lock context, not us.
+ */
+ if (stale || hold)
return;
- ASSERT(!stale || aborted);
xfs_buf_relse(bp);
}
@@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_push = xfs_buf_item_push,
};
-STATIC void
-xfs_buf_item_get_format(
- struct xfs_buf_log_item *bip,
- int count)
-{
- ASSERT(bip->bli_formats == NULL);
- bip->bli_format_count = count;
-
- if (count == 1) {
- bip->bli_formats = &bip->__bli_format;
- return;
- }
-
- bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
- GFP_KERNEL | __GFP_NOFAIL);
-}
-
-STATIC void
-xfs_buf_item_free_format(
- struct xfs_buf_log_item *bip)
-{
- if (bip->bli_formats != &bip->__bli_format) {
- kfree(bip->bli_formats);
- bip->bli_formats = NULL;
- }
-}
-
/*
* Allocate a new buf log item to go with the given buffer.
* Set the buffer's b_log_item field to point to the new
@@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format(
return false;
}
-STATIC void
-xfs_buf_item_free(
- struct xfs_buf_log_item *bip)
-{
- xfs_buf_item_free_format(bip);
- kvfree(bip->bli_item.li_lv_shadow);
- kmem_cache_free(xfs_buf_item_cache, bip);
-}
-
-/*
- * xfs_buf_item_relse() is called when the buf log item is no longer needed.
- */
-void
-xfs_buf_item_relse(
- struct xfs_buf *bp)
-{
- struct xfs_buf_log_item *bip = bp->b_log_item;
-
- trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
-
- if (atomic_read(&bip->bli_refcount))
- return;
- bp->b_log_item = NULL;
- xfs_buf_rele(bp);
- xfs_buf_item_free(bip);
-}
-
void
xfs_buf_item_done(
struct xfs_buf *bp)
@@ -1023,5 +1086,5 @@ xfs_buf_item_done(
xfs_trans_ail_delete(&bp->b_log_item->bli_item,
(bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
SHUTDOWN_CORRUPT_INCORE);
- xfs_buf_item_relse(bp);
+ xfs_buf_item_relse(bp->b_log_item);
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index e10e324cd245..416890b84f8c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -49,8 +49,7 @@ struct xfs_buf_log_item {
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_done(struct xfs_buf *bp);
-void xfs_buf_item_relse(struct xfs_buf *);
-bool xfs_buf_item_put(struct xfs_buf_log_item *);
+void xfs_buf_item_put(struct xfs_buf_log_item *bip);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_inode_iodone(struct xfs_buf *);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4e32f0860b7..0bd8022e47b4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1398,11 +1398,9 @@ xfs_qm_dqflush(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
trace_xfs_dqflush(dqp);
-
- xfs_qm_dqunpin_wait(dqp);
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 48254a72071b..0b41b18debf3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range(
}
#define XFS_FALLOC_FL_SUPPORTED \
- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
- FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
+ (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
+ FALLOC_FL_UNSHARE_RANGE)
STATIC long
__xfs_file_fallocate(
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 726e29b837e6..bbc2f2973dcc 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -979,7 +979,15 @@ xfs_reclaim_inode(
*/
if (xlog_is_shutdown(ip->i_mount->m_log)) {
xfs_iunpin_wait(ip);
+ /*
+ * Avoid a ABBA deadlock on the inode cluster buffer vs
+ * concurrent xfs_ifree_cluster() trying to mark the inode
+ * stale. We don't need the inode locked to run the flush abort
+ * code, but the flush abort needs to lock the cluster buffer.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iflush_shutdown_abort(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
goto reclaim;
}
if (xfs_ipincount(ip))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ee3e0f284287..761a996a857c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1635,7 +1635,7 @@ retry:
iip = ip->i_itemp;
if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
- ASSERT(iip->ili_last_fields);
+ ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log));
goto out_iunlock;
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c6cb0b6b9e46..285e27ff89e2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -758,11 +758,14 @@ xfs_inode_item_push(
* completed and items removed from the AIL before the next push
* attempt.
*/
+ trace_xfs_inode_push_stale(ip, _RET_IP_);
return XFS_ITEM_PINNED;
}
- if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp))
+ if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) {
+ trace_xfs_inode_push_pinned(ip, _RET_IP_);
return XFS_ITEM_PINNED;
+ }
if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f66d2d430e4f..a80cb6b9969a 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -793,8 +793,10 @@ xlog_cil_ail_insert(
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
- if (aborted)
+ if (aborted) {
+ trace_xlog_ail_insert_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
lip->li_ops->iop_release(lip);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 08443ceec329..866c71d9fbae 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -320,7 +320,7 @@ xfs_mru_cache_create(
xfs_mru_cache_free_func_t free_func)
{
struct xfs_mru_cache *mru = NULL;
- int err = 0, grp;
+ int grp;
unsigned int grp_time;
if (mrup)
@@ -341,8 +341,8 @@ xfs_mru_cache_create(
mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists),
GFP_KERNEL | __GFP_NOFAIL);
if (!mru->lists) {
- err = -ENOMEM;
- goto exit;
+ kfree(mru);
+ return -ENOMEM;
}
for (grp = 0; grp < mru->grp_count; grp++)
@@ -361,14 +361,7 @@ xfs_mru_cache_create(
mru->free_func = free_func;
mru->data = data;
*mrup = mru;
-
-exit:
- if (err && mru && mru->lists)
- kfree(mru->lists);
- if (err && mru)
- kfree(mru);
-
- return err;
+ return 0;
}
/*
@@ -425,10 +418,6 @@ xfs_mru_cache_insert(
{
int error = -EINVAL;
- ASSERT(mru && mru->lists);
- if (!mru || !mru->lists)
- goto out_free;
-
error = -ENOMEM;
if (radix_tree_preload(GFP_KERNEL))
goto out_free;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 417439b58785..fa135ac26471 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,6 +134,7 @@ xfs_qm_dqpurge(
dqp->q_flags |= XFS_DQFLAG_FREEING;
+ xfs_qm_dqunpin_wait(dqp);
xfs_dqflock(dqp);
/*
@@ -465,6 +466,7 @@ xfs_qm_dquot_isolate(
struct xfs_dquot *dqp = container_of(item,
struct xfs_dquot, q_lru);
struct xfs_qm_isolate *isol = arg;
+ enum lru_status ret = LRU_SKIP;
if (!xfs_dqlock_nowait(dqp))
goto out_miss_busy;
@@ -478,6 +480,16 @@ xfs_qm_dquot_isolate(
goto out_miss_unlock;
/*
+ * If the dquot is pinned or dirty, rotate it to the end of the LRU to
+ * give some time for it to be cleaned before we try to isolate it
+ * again.
+ */
+ ret = LRU_ROTATE;
+ if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) {
+ goto out_miss_unlock;
+ }
+
+ /*
* This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
*/
@@ -492,41 +504,14 @@ xfs_qm_dquot_isolate(
}
/*
- * If the dquot is dirty, flush it. If it's already being flushed, just
- * skip it so there is time for the IO to complete before we try to
- * reclaim it again on the next LRU pass.
+ * The dquot may still be under IO, in which case the flush lock will be
+ * held. If we can't get the flush lock now, just skip over the dquot as
+ * if it was dirty.
*/
if (!xfs_dqflock_nowait(dqp))
goto out_miss_unlock;
- if (XFS_DQ_IS_DIRTY(dqp)) {
- struct xfs_buf *bp = NULL;
- int error;
-
- trace_xfs_dqreclaim_dirty(dqp);
-
- /* we have to drop the LRU lock to flush the dquot */
- spin_unlock(&lru->lock);
-
- error = xfs_dquot_use_attached_buf(dqp, &bp);
- if (!bp || error == -EAGAIN) {
- xfs_dqfunlock(dqp);
- goto out_unlock_dirty;
- }
-
- /*
- * dqflush completes dqflock on error, and the delwri ioend
- * does it on success.
- */
- error = xfs_qm_dqflush(dqp, bp);
- if (error)
- goto out_unlock_dirty;
-
- xfs_buf_delwri_queue(bp, &isol->buffers);
- xfs_buf_relse(bp);
- goto out_unlock_dirty;
- }
-
+ ASSERT(!XFS_DQ_IS_DIRTY(dqp));
xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
@@ -548,13 +533,7 @@ out_miss_unlock:
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- return LRU_SKIP;
-
-out_unlock_dirty:
- trace_xfs_dqreclaim_busy(dqp);
- XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- xfs_dqunlock(dqp);
- return LRU_RETRY;
+ return ret;
}
static unsigned long
@@ -1486,7 +1465,6 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
- struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@@ -1497,34 +1475,8 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
- /*
- * The only way the dquot is already flush locked by the time quotacheck
- * gets here is if reclaim flushed it before the dqadjust walk dirtied
- * it for the final time. Quotacheck collects all dquot bufs in the
- * local delwri queue before dquots are dirtied, so reclaim can't have
- * possibly queued it for I/O. The only way out is to push the buffer to
- * cycle the flush lock.
- */
- if (!xfs_dqflock_nowait(dqp)) {
- /* buf is pinned in-core by delwri list */
- error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp);
- if (error)
- goto out_unlock;
-
- if (!(bp->b_flags & _XBF_DELWRI_Q)) {
- error = -EAGAIN;
- xfs_buf_relse(bp);
- goto out_unlock;
- }
- xfs_buf_unlock(bp);
-
- xfs_buf_delwri_pushbuf(bp, buffer_list);
- xfs_buf_rele(bp);
-
- error = -EAGAIN;
- goto out_unlock;
- }
+ xfs_qm_dqunpin_wait(dqp);
+ xfs_dqflock(dqp);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6484c596ecea..736eb0924573 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom(
kfree(nmp);
+ trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
+
if (min_logfsbs > mp->m_sb.sb_logblocks)
return -EINVAL;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0bc4b5489078..bb0a82635a77 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2020,14 +2020,13 @@ xfs_remount_rw(
int error;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
- bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
+ xfs_readonly_buftarg(mp->m_logdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only logdev");
return -EACCES;
}
- if (mp->m_rtdev_targp &&
- bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
+ if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only rtdev");
return -EACCES;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 01d284a1c759..ba45d801df1c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_iodone_async);
@@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__field(xfs_ino_t, ino)
__field(int, count)
__field(int, pincount)
+ __field(unsigned long, iflags)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
@@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__entry->ino = ip->i_ino;
__entry->count = atomic_read(&VFS_I(ip)->i_count);
__entry->pincount = atomic_read(&ip->i_pincount);
+ __entry->iflags = ip->i_flags;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
+ TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
__entry->pincount,
+ __entry->iflags,
(char *)__entry->caller_ip)
)
@@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_inode_push_pinned);
+DEFINE_IREF_EVENT(xfs_inode_push_stale);
DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name),
@@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
+DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
+DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c6657072361a..b4a07af513ba 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -742,8 +742,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
- if (abort)
+ if (abort) {
+ trace_xfs_trans_free_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->iop_release)
lip->li_ops->iop_release(lip);
}
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 80add26c0111..01315ed75502 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -727,7 +727,7 @@ xfs_select_zone(
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
- if (oz)
+ if (oz || xfs_is_shutdown(mp))
break;
schedule();
}
@@ -777,26 +777,6 @@ xfs_mark_rtg_boundary(
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
}
-static void
-xfs_submit_zoned_bio(
- struct iomap_ioend *ioend,
- struct xfs_open_zone *oz,
- bool is_seq)
-{
- ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
- ioend->io_private = oz;
- atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
-
- if (is_seq) {
- ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
- ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
- } else {
- xfs_mark_rtg_boundary(ioend);
- }
-
- submit_bio(&ioend->io_bio);
-}
-
/*
* Cache the last zone written to for an inode so that it is considered first
* for subsequent writes.
@@ -891,6 +871,26 @@ xfs_zone_cache_create_association(
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
}
+static void
+xfs_submit_zoned_bio(
+ struct iomap_ioend *ioend,
+ struct xfs_open_zone *oz,
+ bool is_seq)
+{
+ ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
+ ioend->io_private = oz;
+ atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
+
+ if (is_seq) {
+ ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
+ ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
+ } else {
+ xfs_mark_rtg_boundary(ioend);
+ }
+
+ submit_bio(&ioend->io_bio);
+}
+
void
xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend,