summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-09-23 18:42:30 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-09-23 20:03:23 -0400
commita053ebfb8c89e023a44c365e369f4053cfc53376 (patch)
treec39f6a6689bbdeee358e824971d3e186ac4c3877
parent731926b5e5fc28752433f830569d228513cceea6 (diff)
Update bcachefs sources to f9c612bbf82d bcachefs: Fixes for building in userspace
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--Makefile.compiler8
-rw-r--r--cmd_dump.c10
-rw-r--r--cmd_kill_btree_node.c10
-rw-r--r--cmd_migrate.c13
-rw-r--r--include/linux/blkdev.h49
-rw-r--r--include/linux/compiler.h1
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--libbcachefs/acl.c103
-rw-r--r--libbcachefs/acl.h6
-rw-r--r--libbcachefs/alloc_background.c161
-rw-r--r--libbcachefs/alloc_foreground.c19
-rw-r--r--libbcachefs/backpointers.c31
-rw-r--r--libbcachefs/bcachefs.h9
-rw-r--r--libbcachefs/bcachefs_format.h121
-rw-r--r--libbcachefs/bkey.c33
-rw-r--r--libbcachefs/bkey.h6
-rw-r--r--libbcachefs/bkey_methods.c17
-rw-r--r--libbcachefs/bkey_sort.h16
-rw-r--r--libbcachefs/bset.c27
-rw-r--r--libbcachefs/btree_cache.c37
-rw-r--r--libbcachefs/btree_gc.c141
-rw-r--r--libbcachefs/btree_io.c80
-rw-r--r--libbcachefs/btree_io.h2
-rw-r--r--libbcachefs/btree_iter.c135
-rw-r--r--libbcachefs/btree_iter.h83
-rw-r--r--libbcachefs/btree_key_cache.c28
-rw-r--r--libbcachefs/btree_locking.h2
-rw-r--r--libbcachefs/btree_trans_commit.c22
-rw-r--r--libbcachefs/btree_types.h38
-rw-r--r--libbcachefs/btree_update.c125
-rw-r--r--libbcachefs/btree_update.h35
-rw-r--r--libbcachefs/btree_update_interior.c112
-rw-r--r--libbcachefs/btree_write_buffer.c2
-rw-r--r--libbcachefs/buckets.c12
-rw-r--r--libbcachefs/buckets.h35
-rw-r--r--libbcachefs/buckets_waiting_for_journal.c2
-rw-r--r--libbcachefs/chardev.c55
-rw-r--r--libbcachefs/checksum.c27
-rw-r--r--libbcachefs/checksum.h7
-rw-r--r--libbcachefs/compress.c8
-rw-r--r--libbcachefs/counters.c2
-rw-r--r--libbcachefs/data_update.c8
-rw-r--r--libbcachefs/data_update.h2
-rw-r--r--libbcachefs/debug.c57
-rw-r--r--libbcachefs/dirent.c31
-rw-r--r--libbcachefs/disk_groups.c15
-rw-r--r--libbcachefs/ec.c40
-rw-r--r--libbcachefs/ec.h2
-rw-r--r--libbcachefs/errcode.c9
-rw-r--r--libbcachefs/errcode.h14
-rw-r--r--libbcachefs/error.c1
-rw-r--r--libbcachefs/fs-io-buffered.c97
-rw-r--r--libbcachefs/fs-io-direct.c18
-rw-r--r--libbcachefs/fs-io-pagecache.c37
-rw-r--r--libbcachefs/fs-io.c301
-rw-r--r--libbcachefs/fs-io.h4
-rw-r--r--libbcachefs/fs-ioctl.c5
-rw-r--r--libbcachefs/fs-ioctl.h6
-rw-r--r--libbcachefs/fs.c205
-rw-r--r--libbcachefs/fs.h2
-rw-r--r--libbcachefs/fsck.c186
-rw-r--r--libbcachefs/inode.c76
-rw-r--r--libbcachefs/inode.h3
-rw-r--r--libbcachefs/io.h202
-rw-r--r--libbcachefs/io_misc.c497
-rw-r--r--libbcachefs/io_misc.h34
-rw-r--r--libbcachefs/io_read.c1210
-rw-r--r--libbcachefs/io_read.h158
-rw-r--r--libbcachefs/io_write.c (renamed from libbcachefs/io.c)1448
-rw-r--r--libbcachefs/io_write.h110
-rw-r--r--libbcachefs/io_write_types.h (renamed from libbcachefs/io_types.h)75
-rw-r--r--libbcachefs/journal.c35
-rw-r--r--libbcachefs/journal.h34
-rw-r--r--libbcachefs/journal_io.c34
-rw-r--r--libbcachefs/journal_reclaim.c24
-rw-r--r--libbcachefs/journal_reclaim.h3
-rw-r--r--libbcachefs/journal_seq_blacklist.c12
-rw-r--r--libbcachefs/logged_ops.c110
-rw-r--r--libbcachefs/logged_ops.h20
-rw-r--r--libbcachefs/lru.c4
-rw-r--r--libbcachefs/migrate.c29
-rw-r--r--libbcachefs/move.c45
-rw-r--r--libbcachefs/move.h1
-rw-r--r--libbcachefs/movinggc.c51
-rw-r--r--libbcachefs/opts.c3
-rw-r--r--libbcachefs/opts.h2
-rw-r--r--libbcachefs/printbuf.c66
-rw-r--r--libbcachefs/quota.c19
-rw-r--r--libbcachefs/rebalance.c4
-rw-r--r--libbcachefs/recovery.c25
-rw-r--r--libbcachefs/recovery_types.h1
-rw-r--r--libbcachefs/reflink.c44
-rw-r--r--libbcachefs/replicas.c2
-rw-r--r--libbcachefs/six.c1
-rw-r--r--libbcachefs/snapshot.c93
-rw-r--r--libbcachefs/snapshot.h2
-rw-r--r--libbcachefs/subvolume.c13
-rw-r--r--libbcachefs/subvolume.h2
-rw-r--r--libbcachefs/super-io.c32
-rw-r--r--libbcachefs/super.c78
-rw-r--r--libbcachefs/super_types.h3
-rw-r--r--libbcachefs/sysfs.c42
-rw-r--r--libbcachefs/tests.c262
-rw-r--r--libbcachefs/trace.h19
-rw-r--r--libbcachefs/util.c25
-rw-r--r--libbcachefs/util.h26
-rw-r--r--libbcachefs/varint.c24
-rw-r--r--libbcachefs/vstructs.h6
-rw-r--r--libbcachefs/xattr.c38
-rw-r--r--linux/blkdev.c16
-rw-r--r--rust-src/bch_bindgen/src/bkey.rs4
-rw-r--r--rust-src/bch_bindgen/src/btree.rs15
-rw-r--r--rust-src/bch_bindgen/src/libbcachefs_wrapper.h8
114 files changed, 4091 insertions, 3673 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 57c74ea..0c7b855 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-e7f62157681d96386dc500609149b9685358a2b0
+f9c612bbf82da87d7d4a005310c5213db00e22de
diff --git a/Makefile.compiler b/Makefile.compiler
index 7aa1fbc..8fcb427 100644
--- a/Makefile.compiler
+++ b/Makefile.compiler
@@ -32,13 +32,13 @@ try-run = $(shell set -e; \
# Usage: aflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
as-option = $(call try-run,\
- $(CC) -Werror $(KBUILD_AFLAGS) $(1) -c -x assembler-with-cpp /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(1) -c -x assembler-with-cpp /dev/null -o "$$TMP",$(1),$(2))
# as-instr
# Usage: aflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
- printf "%b\n" "$(1)" | $(CC) -Werror $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
+ printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
@@ -72,7 +72,3 @@ clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
# ld-option
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
-
-# ld-ifversion
-# Usage: $(call ld-ifversion, -ge, 22252, y)
-ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))
diff --git a/cmd_dump.c b/cmd_dump.c
index bf570dc..0d34923 100644
--- a/cmd_dump.c
+++ b/cmd_dump.c
@@ -61,13 +61,11 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
for (i = 0; i < BTREE_ID_NR; i++) {
const struct bch_extent_ptr *ptr;
struct bkey_ptrs_c ptrs;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
- bch2_trans_init(&trans, c, 0, 0);
-
- __for_each_btree_node(&trans, iter, i, POS_MIN, 0, 1, 0, b, ret) {
+ __for_each_btree_node(trans, iter, i, POS_MIN, 0, 1, 0, b, ret) {
struct btree_node_iter iter;
struct bkey u;
struct bkey_s_c k;
@@ -97,8 +95,8 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
btree_bytes(c));
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
}
qcow2_write_image(ca->disk_sb.bdev->bd_buffered_fd, fd, &data,
diff --git a/cmd_kill_btree_node.c b/cmd_kill_btree_node.c
index e9b8265..83389bc 100644
--- a/cmd_kill_btree_node.c
+++ b/cmd_kill_btree_node.c
@@ -64,7 +64,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
if (IS_ERR(c))
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
int ret;
@@ -74,9 +74,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
if (ret)
die("error %s from posix_memalign", bch2_err_str(ret));
- bch2_trans_init(&trans, c, 0, 0);
-
- __for_each_btree_node(&trans, iter, btree_id, POS_MIN, 0, level, 0, b, ret) {
+ __for_each_btree_node(trans, iter, btree_id, POS_MIN, 0, level, 0, b, ret) {
if (b->c.level != level)
continue;
@@ -113,8 +111,8 @@ int cmd_kill_btree_node(int argc, char *argv[])
bch_err(c, "node at specified index not found");
ret = EXIT_FAILURE;
done:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
bch2_fs_stop(c);
return ret;
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 3958ba6..85ab96c 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -33,7 +33,7 @@
#include "libbcachefs/errcode.h"
#include "libbcachefs/fs-common.h"
#include "libbcachefs/inode.h"
-#include "libbcachefs/io.h"
+#include "libbcachefs/io_write.h"
#include "libbcachefs/replicas.h"
#include "libbcachefs/str_hash.h"
#include "libbcachefs/super.h"
@@ -126,7 +126,7 @@ static void update_inode(struct bch_fs *c,
bch2_inode_pack(&packed, inode);
packed.inode.k.p.snapshot = U32_MAX;
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
- NULL, NULL, 0);
+ NULL, 0);
if (ret)
die("error updating inode: %s", bch2_err_str(ret));
}
@@ -140,7 +140,7 @@ static void create_link(struct bch_fs *c,
struct bch_inode_unpacked inode;
int ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_link_trans(&trans,
+ bch2_link_trans(trans,
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
(subvol_inum) { 1, inum }, &inode, &qstr));
if (ret)
@@ -159,7 +159,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c,
bch2_inode_init_early(c, &new_inode);
int ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_create_trans(&trans,
+ bch2_create_trans(trans,
(subvol_inum) { 1, parent->bi_inum }, parent,
&new_inode, &qstr,
uid, gid, mode, rdev, NULL, NULL,
@@ -232,7 +232,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
struct bch_inode_unpacked inode_u;
int ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_xattr_set(&trans,
+ bch2_xattr_set(trans,
(subvol_inum) { 1, dst->bi_inum },
&inode_u, &hash_info, attr,
val, val_size, h->flags, 0));
@@ -339,8 +339,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
die("error reserving space in new filesystem: %s",
bch2_err_str(ret));
- ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
- &res, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
if (ret)
die("btree insert error %s", bch2_err_str(ret));
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7d378ab..3914311 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -6,6 +6,8 @@
#include <linux/kobject.h>
#include <linux/types.h>
+#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
+
#define BIO_MAX_VECS 256U
typedef unsigned fmode_t;
@@ -21,30 +23,20 @@ struct user_namespace;
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
-/* file is open for reading */
-#define FMODE_READ ((__force fmode_t)0x1)
-/* file is open for writing */
-#define FMODE_WRITE ((__force fmode_t)0x2)
-/* file is seekable */
-#define FMODE_LSEEK ((__force fmode_t)0x4)
-/* file can be accessed using pread */
-#define FMODE_PREAD ((__force fmode_t)0x8)
-/* file can be accessed using pwrite */
-#define FMODE_PWRITE ((__force fmode_t)0x10)
-/* File is opened for execution with sys_execve / sys_uselib */
-#define FMODE_EXEC ((__force fmode_t)0x20)
-/* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY ((__force fmode_t)0x40)
-/* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL ((__force fmode_t)0x80)
-/* File is opened using open(.., 3, ..) and is writeable only for ioctls
- (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
-/* 32bit hashes as llseek() offset (for directories) */
-#define FMODE_32BITHASH ((__force fmode_t)0x200)
-/* 64bit hashes as llseek() offset (for directories) */
-#define FMODE_64BITHASH ((__force fmode_t)0x400)
-#define FMODE_BUFFERED ((__force fmode_t)0x800)
+typedef unsigned int __bitwise blk_mode_t;
+
+/* open for reading */
+#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
+/* open for writing */
+#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
+/* open exclusively (vs other exclusive openers */
+#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
+/* opened with O_NDELAY */
+#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
+/* open for "writes" only for ioctls (specialy hack for floppy.c) */
+#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
+
+#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
@@ -93,9 +85,14 @@ int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsig
unsigned bdev_logical_block_size(struct block_device *bdev);
sector_t get_capacity(struct gendisk *disk);
-void blkdev_put(struct block_device *bdev, fmode_t mode);
+struct blk_holder_ops {
+ void (*mark_dead)(struct block_device *bdev);
+};
+
+void blkdev_put(struct block_device *bdev, void *holder);
void bdput(struct block_device *bdev);
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder);
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hop);
int lookup_bdev(const char *path, dev_t *);
struct super_block {
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 39df1f1..b9486db 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -65,6 +65,7 @@
#define unreachable() __builtin_unreachable()
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#define fallthrough __attribute__((__fallthrough__))
+#define __noreturn __attribute__((__noreturn__))
#define ___PASTE(a,b) a##b
#define __PASTE(a,b) ___PASTE(a,b)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ef03253..ec5f478 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -12,7 +12,7 @@
#define rcu_access_pointer(p) READ_ONCE(p)
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
-#define kvfree_rcu(ptr) kfree(ptr) /* XXX */
+#define kvfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index b1a4888..f380989 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -1,18 +1,71 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef CONFIG_BCACHEFS_POSIX_ACL
#include "bcachefs.h"
-#include <linux/fs.h>
+#include "acl.h"
+#include "xattr.h"
+
#include <linux/posix_acl.h>
+
+static const char * const acl_types[] = {
+ [ACL_USER_OBJ] = "user_obj",
+ [ACL_USER] = "user",
+ [ACL_GROUP_OBJ] = "group_obj",
+ [ACL_GROUP] = "group",
+ [ACL_MASK] = "mask",
+ [ACL_OTHER] = "other",
+ NULL,
+};
+
+void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size)
+{
+ const void *p, *end = value + size;
+
+ if (!value ||
+ size < sizeof(bch_acl_header) ||
+ ((bch_acl_header *)value)->a_version != cpu_to_le32(BCH_ACL_VERSION))
+ return;
+
+ p = value + sizeof(bch_acl_header);
+ while (p < end) {
+ const bch_acl_entry *in = p;
+ unsigned tag = le16_to_cpu(in->e_tag);
+
+ prt_str(out, acl_types[tag]);
+
+ switch (tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ p += sizeof(bch_acl_entry_short);
+ break;
+ case ACL_USER:
+ prt_printf(out, " uid %u", le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ case ACL_GROUP:
+ prt_printf(out, " gid %u", le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ }
+
+ prt_printf(out, " %o", le16_to_cpu(in->e_perm));
+
+ if (p != end)
+ prt_char(out, ' ');
+ }
+}
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
+#include "fs.h"
+
+#include <linux/fs.h>
#include <linux/posix_acl_xattr.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include "acl.h"
-#include "fs.h"
-#include "xattr.h"
-
static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
{
return sizeof(bch_acl_header) +
@@ -226,18 +279,16 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc,
+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
if (ret) {
if (!bch2_err_matches(ret, ENOENT))
@@ -253,7 +304,7 @@ retry:
}
xattr = bkey_s_c_to_xattr(k);
- acl = bch2_acl_from_disk(&trans, xattr_val(xattr.v),
+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
if (!IS_ERR(acl))
@@ -262,8 +313,8 @@ out:
if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return acl;
}
@@ -303,7 +354,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
struct posix_acl *acl;
@@ -311,12 +362,11 @@ int bch2_set_acl(struct mnt_idmap *idmap,
int ret;
mutex_lock(&inode->ei_update_lock);
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
acl = _acl;
- ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
@@ -329,30 +379,30 @@ retry:
goto btree_err;
}
- ret = bch2_set_acl_trans(&trans, inode_inum(inode), &inode_u, acl, type);
+ ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type);
if (ret)
goto btree_err;
inode_u.bi_ctime = bch2_current_time(c);
inode_u.bi_mode = mode;
- ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
- bch2_trans_commit(&trans, NULL, NULL, 0);
+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
btree_err:
- bch2_trans_iter_exit(&trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &inode_iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err;
- bch2_inode_update_after_write(&trans, inode, &inode_u,
+ bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_CTIME|ATTR_MODE);
set_cached_acl(&inode->v, type, acl);
err:
- bch2_trans_exit(&trans);
mutex_unlock(&inode->ei_update_lock);
+ bch2_trans_put(trans);
return ret;
}
@@ -367,7 +417,7 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
struct btree_iter iter;
struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new;
- struct posix_acl *acl;
+ struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
@@ -377,9 +427,10 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
return bch2_err_matches(ret, ENOENT) ? 0 : ret;
k = bch2_btree_iter_peek_slot(&iter);
- xattr = bkey_s_c_to_xattr(k);
+ ret = bkey_err(k);
if (ret)
goto err;
+ xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h
index bb21d8d..27e7eec 100644
--- a/libbcachefs/acl.h
+++ b/libbcachefs/acl.h
@@ -7,8 +7,6 @@ struct bch_hash_info;
struct bch_inode_info;
struct posix_acl;
-#ifdef CONFIG_BCACHEFS_POSIX_ACL
-
#define BCH_ACL_VERSION 0x0001
typedef struct {
@@ -26,6 +24,10 @@ typedef struct {
__le32 a_version;
} bch_acl_header;
+void bch2_acl_to_text(struct printbuf *, const void *, size_t);
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 069d98a..19ef7a4 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -237,13 +237,12 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
- enum bkey_invalid_flags flags,
- struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) {
- prt_printf(err, "bad val size (%u > %lu)",
+ prt_printf(err, "bad val size (%u > %zu)",
alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
return -BCH_ERR_invalid_bkey;
}
@@ -527,7 +526,7 @@ int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) {
- prt_printf(err, "bad val size (%lu != %zu)",
+ prt_printf(err, "bad val size (%zu != %zu)",
bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
return -BCH_ERR_invalid_bkey;
}
@@ -549,7 +548,7 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke
int bch2_bucket_gens_init(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_alloc_v4 a;
@@ -560,9 +559,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
/*
* Not a fsck error because this is checked/repaired by
@@ -575,10 +572,10 @@ int bch2_bucket_gens_init(struct bch_fs *c)
pos = alloc_gens_pos(iter.pos, &offset);
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
- ret = commit_do(&trans, NULL, NULL,
+ ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
- __bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
if (ret)
break;
have_bucket_gens_key = false;
@@ -592,15 +589,15 @@ int bch2_bucket_gens_init(struct bch_fs *c)
g.v.gens[offset] = gen;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (have_bucket_gens_key && !ret)
- ret = commit_do(&trans, NULL, NULL,
+ ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
- __bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@@ -609,20 +606,19 @@ int bch2_bucket_gens_init(struct bch_fs *c)
int bch2_alloc_read(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
int ret;
down_read(&c->gc_lock);
- bch2_trans_init(&trans, c, 0, 0);
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
const struct bch_bucket_gens *g;
u64 b;
- for_each_btree_key(&trans, iter, BTREE_ID_bucket_gens, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
@@ -646,11 +642,11 @@ int bch2_alloc_read(struct bch_fs *c)
b++)
*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
} else {
struct bch_alloc_v4 a;
- for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
/*
* Not a fsck error because this is checked/repaired by
@@ -663,10 +659,10 @@ int bch2_alloc_read(struct bch_fs *c)
*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
up_read(&c->gc_lock);
if (ret)
@@ -1201,15 +1197,15 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
}
if (need_update) {
- struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(g));
+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
- ret = PTR_ERR_OR_ZERO(k);
+ ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
- memcpy(k, &g, sizeof(g));
+ memcpy(u, &g, sizeof(g));
- ret = bch2_trans_update(trans, bucket_gens_iter, k, 0);
+ ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
if (ret)
goto err;
}
@@ -1286,7 +1282,7 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
if (!btree_id_is_extents(iter->btree_id)) {
return __bch2_check_discard_freespace_key(trans, iter);
} else {
- int ret;
+ int ret = 0;
while (!bkey_eq(iter->pos, end) &&
!(ret = btree_trans_too_many_iters(trans) ?:
@@ -1355,15 +1351,14 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
}
if (need_update) {
- struct bkey_i *k;
+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
- k = bch2_trans_kmalloc(trans, sizeof(g));
- ret = PTR_ERR_OR_ZERO(k);
+ ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto out;
- memcpy(k, &g, sizeof(g));
- ret = bch2_trans_update(trans, iter, k, 0);
+ memcpy(u, &g, sizeof(g));
+ ret = bch2_trans_update(trans, iter, u, 0);
}
out:
fsck_err:
@@ -1373,27 +1368,25 @@ fsck_err:
int bch2_check_alloc_info(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
struct bkey hole;
struct bkey_s_c k;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH);
- bch2_trans_iter_init(&trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
+ bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_PREFETCH);
- bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
+ bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH);
- bch2_trans_iter_init(&trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
+ bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH);
while (1) {
struct bpos next;
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
k = bch2_get_key_or_real_bucket_hole(&iter, &hole);
ret = bkey_err(k);
@@ -1406,7 +1399,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
if (k.k->type) {
next = bpos_nosnap_successor(k.k->p);
- ret = bch2_check_alloc_key(&trans,
+ ret = bch2_check_alloc_key(trans,
k, &iter,
&discard_iter,
&freespace_iter,
@@ -1416,11 +1409,11 @@ int bch2_check_alloc_info(struct bch_fs *c)
} else {
next = k.k->p;
- ret = bch2_check_alloc_hole_freespace(&trans,
+ ret = bch2_check_alloc_hole_freespace(trans,
bkey_start_pos(k.k),
&next,
&freespace_iter) ?:
- bch2_check_alloc_hole_bucket_gens(&trans,
+ bch2_check_alloc_hole_bucket_gens(trans,
bkey_start_pos(k.k),
&next,
&bucket_gens_iter);
@@ -1428,7 +1421,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
goto bkey_err;
}
- ret = bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret)
@@ -1441,29 +1434,29 @@ bkey_err:
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &bucket_gens_iter);
- bch2_trans_iter_exit(&trans, &freespace_iter);
- bch2_trans_iter_exit(&trans, &discard_iter);
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &bucket_gens_iter);
+ bch2_trans_iter_exit(trans, &freespace_iter);
+ bch2_trans_iter_exit(trans, &discard_iter);
+ bch2_trans_iter_exit(trans, &iter);
if (ret < 0)
goto err;
- ret = for_each_btree_key2(&trans, iter,
+ ret = for_each_btree_key2(trans, iter,
BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_PREFETCH, k,
- bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?:
- for_each_btree_key2(&trans, iter,
+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
+ for_each_btree_key2(trans, iter,
BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH, k,
- bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?:
- for_each_btree_key_commit(&trans, iter,
+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
- bch2_check_bucket_gens_key(&trans, &iter, k));
+ bch2_check_bucket_gens_key(trans, &iter, k));
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -1549,10 +1542,10 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
- bch2_check_alloc_to_lru_ref(&trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter)));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -1677,29 +1670,25 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
struct bpos discard_pos_done = POS_MAX;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
/*
* We're doing the commit in bch2_discard_one_bucket instead of using
* for_each_btree_key_commit() so that we can increment counters after
* successful commit:
*/
- ret = for_each_btree_key2(&trans, iter,
- BTREE_ID_need_discard, POS_MIN, 0, k,
- bch2_discard_one_bucket(&trans, &iter, &discard_pos_done,
- &seen,
- &open,
- &need_journal_commit,
- &discarded));
-
- bch2_trans_exit(&trans);
+ ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter,
+ BTREE_ID_need_discard, POS_MIN, 0, k,
+ bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
+ &seen,
+ &open,
+ &need_journal_commit,
+ &discarded)));
if (need_journal_commit * 2 > seen)
bch2_journal_flush_async(&c->journal, NULL);
@@ -1805,15 +1794,13 @@ static void bch2_do_invalidates_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
struct bch_dev *ca;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = bch2_btree_write_buffer_flush(&trans);
+ ret = bch2_btree_write_buffer_flush(trans);
if (ret)
goto err;
@@ -1821,11 +1808,11 @@ static void bch2_do_invalidates_work(struct work_struct *work)
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0, 0),
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
BTREE_ITER_INTENT, k,
- invalidate_one_bucket(&trans, &iter, k, &nr_to_invalidate));
+ invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
if (ret < 0) {
percpu_ref_put(&ca->ref);
@@ -1833,7 +1820,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
}
}
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@@ -1847,7 +1834,7 @@ void bch2_do_invalidates(struct bch_fs *c)
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
unsigned long *last_updated)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey hole;
@@ -1855,9 +1842,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
struct bch_member *m;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_PREFETCH);
/*
@@ -1871,7 +1856,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
*last_updated = jiffies;
}
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
if (bkey_ge(iter.pos, end)) {
ret = 0;
@@ -1891,8 +1876,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
- ret = bch2_bucket_do_index(&trans, k, a, true) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_bucket_do_index(trans, k, a, true) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL);
if (ret)
@@ -1902,7 +1887,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
} else {
struct bkey_i *freespace;
- freespace = bch2_trans_kmalloc(&trans, sizeof(*freespace));
+ freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
ret = PTR_ERR_OR_ZERO(freespace);
if (ret)
goto bkey_err;
@@ -1912,8 +1897,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
freespace->k.p = k.k->p;
freespace->k.size = k.k->size;
- ret = __bch2_btree_insert(&trans, BTREE_ID_freespace, freespace, 0) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL);
if (ret)
@@ -1928,11 +1913,11 @@ bkey_err:
break;
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
if (ret < 0) {
- bch_err(ca, "error initializing free space: %s", bch2_err_str(ret));
+ bch_err_msg(ca, ret, "initializing free space");
return ret;
}
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index e02749d..3bc4abd 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -25,7 +25,7 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "io.h"
+#include "io_write.h"
#include "journal.h"
#include "movinggc.h"
#include "nocow_locking.h"
@@ -502,9 +502,14 @@ again:
}
/**
- * bch_bucket_alloc - allocate a single bucket from a specific device
+ * bch2_bucket_alloc_trans - allocate a single bucket from a specific device
+ * @trans: transaction object
+ * @ca: device to allocate from
+ * @watermark: how important is this allocation?
+ * @cl: if not NULL, closure to be used to wait if buckets not available
+ * @usage: for secondarily also returning the current device usage
*
- * Returns index of bucket on success, 0 on failure
+ * Returns: an open_bucket on success, or an ERR_PTR() on failure.
*/
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
@@ -597,7 +602,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0,
- PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, watermark,
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
cl, &usage)));
return ob;
}
@@ -775,7 +780,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct dev_alloc_list devs_sorted;
struct ec_stripe_head *h;
struct open_bucket *ob;
- struct bch_dev *ca;
unsigned i, ec_idx;
int ret = 0;
@@ -805,8 +809,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
}
goto out_put_head;
got_bucket:
- ca = bch_dev_bkey_exists(c, ob->dev);
-
ob->ec_idx = ec_idx;
ob->ec = h->s;
ec_stripe_new_get(h->s, STRIPE_REF_io);
@@ -1032,10 +1034,13 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
/**
* should_drop_bucket - check if this is open_bucket should go away
+ * @ob: open_bucket to predicate on
+ * @c: filesystem handle
* @ca: if set, we're killing buckets for a particular device
* @ec: if true, we're shutting down erasure coding and killing all ec
* open_buckets
* otherwise, return true
+ * Returns: true if we should kill this open_bucket
*
* We're killing open_buckets because we're shutting down a device, erasure
* coding, or the entire filesystem - check if this open_bucket matches:
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 8747c5e..cc85615 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -351,20 +351,17 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = { NULL };
- struct bch_dev *ca;
struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
- "backpointer for mising device:\n%s",
+ "backpointer for missing device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
goto out;
}
- ca = bch_dev_bkey_exists(c, k.k->p.inode);
-
alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
bp_pos_to_bucket(c, k.k->p), 0);
ret = bkey_err(alloc_k);
@@ -393,10 +390,10 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- bch2_check_btree_backpointer(&trans, &iter, k)));
+ bch2_check_btree_backpointer(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -629,7 +626,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
enum btree_id btree_id;
- struct bpos_level last_flushed = { UINT_MAX };
+ struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
int ret = 0;
for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
@@ -706,7 +703,7 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
--btree_nodes;
if (!btree_nodes) {
- *end = alloc_k.k->p;
+ *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
break;
}
@@ -726,13 +723,12 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bpos start = POS_MIN, end;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
while (1) {
- ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
+ ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
if (ret)
break;
@@ -752,13 +748,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
printbuf_exit(&buf);
}
- ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
+ ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
if (ret || bpos_eq(end, SPOS_MAX))
break;
start = bpos_successor(end);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@@ -827,13 +823,12 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
int bch2_check_backpointers_to_extents(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
while (1) {
- ret = bch2_get_btree_in_memory_pos(&trans,
+ ret = bch2_get_btree_in_memory_pos(trans,
(1U << BTREE_ID_extents)|
(1U << BTREE_ID_reflink),
~0,
@@ -859,13 +854,13 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
printbuf_exit(&buf);
}
- ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
+ ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
if (ret || !bbpos_cmp(end, BBPOS_MAX))
break;
start = bbpos_successor(end);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 30b3d7b..9ae8225 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -454,6 +454,7 @@ enum gc_phase {
GC_PHASE_BTREE_bucket_gens,
GC_PHASE_BTREE_snapshot_trees,
GC_PHASE_BTREE_deleted_inodes,
+ GC_PHASE_BTREE_logged_ops,
GC_PHASE_PENDING_DELETE,
};
@@ -626,8 +627,8 @@ struct journal_keys {
size_t size;
};
-struct btree_path_buf {
- struct btree_path *path;
+struct btree_trans_buf {
+ struct btree_trans *trans;
};
#define REPLICAS_DELTA_LIST_MAX (1U << 16)
@@ -786,9 +787,9 @@ struct bch_fs {
/* btree_iter.c: */
struct seqmutex btree_trans_lock;
struct list_head btree_trans_list;
- mempool_t btree_paths_pool;
+ mempool_t btree_trans_pool;
mempool_t btree_trans_mem_pool;
- struct btree_path_buf __percpu *btree_paths_bufs;
+ struct btree_trans_buf __percpu *btree_trans_bufs;
struct srcu_struct btree_trans_barrier;
bool btree_trans_barrier_initialized;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index f17238b..f0d1304 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -83,8 +83,8 @@ typedef uuid_t __uuid_t;
#endif
#define BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
+static const __maybe_unused unsigned name##_OFFSET = offset; \
+static const __maybe_unused unsigned name##_BITS = (end - offset); \
\
static inline __u64 name(const type *k) \
{ \
@@ -98,9 +98,9 @@ static inline void SET_##name(type *k, __u64 v) \
}
#define LE_BITMASK(_bits, name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u##_bits name##_MAX = (1ULL << (end - offset)) - 1; \
+static const __maybe_unused unsigned name##_OFFSET = offset; \
+static const __maybe_unused unsigned name##_BITS = (end - offset); \
+static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\
\
static inline __u64 name(const type *k) \
{ \
@@ -370,7 +370,9 @@ static inline void bkey_init(struct bkey *k)
x(backpointer, 28) \
x(inode_v3, 29) \
x(bucket_gens, 30) \
- x(snapshot_tree, 31)
+ x(snapshot_tree, 31) \
+ x(logged_op_truncate, 32) \
+ x(logged_op_finsert, 33)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@@ -723,7 +725,7 @@ struct bch_inode {
__le64 bi_hash_seed;
__le32 bi_flags;
__le16 bi_mode;
- __u8 fields[0];
+ __u8 fields[];
} __packed __aligned(8);
struct bch_inode_v2 {
@@ -733,7 +735,7 @@ struct bch_inode_v2 {
__le64 bi_hash_seed;
__le64 bi_flags;
__le16 bi_mode;
- __u8 fields[0];
+ __u8 fields[];
} __packed __aligned(8);
struct bch_inode_v3 {
@@ -745,7 +747,7 @@ struct bch_inode_v3 {
__le64 bi_sectors;
__le64 bi_size;
__le64 bi_version;
- __u8 fields[0];
+ __u8 fields[];
} __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6
@@ -847,8 +849,8 @@ enum {
__BCH_INODE_NODUMP = 3,
__BCH_INODE_NOATIME = 4,
- __BCH_INODE_I_SIZE_DIRTY = 5,
- __BCH_INODE_I_SECTORS_DIRTY = 6,
+ __BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */
+ __BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */
__BCH_INODE_UNLINKED = 7,
__BCH_INODE_BACKPTR_UNTRUSTED = 8,
@@ -1097,20 +1099,20 @@ struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
- __u64 _data[0];
+ __u64 _data[];
} __packed __aligned(8);
struct bch_indirect_inline_data {
struct bch_val v;
__le64 refcount;
- u8 data[0];
+ u8 data[];
};
/* Inline data */
struct bch_inline_data {
struct bch_val v;
- u8 data[0];
+ u8 data[];
};
/* Subvolumes: */
@@ -1183,6 +1185,33 @@ struct bch_lru {
#define LRU_ID_STRIPES (1U << 16)
+/* Logged operations btree: */
+
+struct bch_logged_op_truncate {
+ struct bch_val v;
+ __le32 subvol;
+ __le32 pad;
+ __le64 inum;
+ __le64 new_i_size;
+};
+
+enum logged_op_finsert_state {
+ LOGGED_OP_FINSERT_start,
+ LOGGED_OP_FINSERT_shift_extents,
+ LOGGED_OP_FINSERT_finish,
+};
+
+struct bch_logged_op_finsert {
+ struct bch_val v;
+ __u8 state;
+ __u8 pad[3];
+ __le32 subvol;
+ __le64 inum;
+ __le64 dst_offset;
+ __le64 src_offset;
+ __le64 pos;
+};
+
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@@ -1223,7 +1252,7 @@ enum bch_sb_field_type {
struct bch_sb_field_journal {
struct bch_sb_field field;
- __le64 buckets[0];
+ __le64 buckets[];
};
struct bch_sb_field_journal_v2 {
@@ -1232,7 +1261,7 @@ struct bch_sb_field_journal_v2 {
struct bch_sb_field_journal_v2_entry {
__le64 start;
__le64 nr;
- } d[0];
+ } d[];
};
/* BCH_SB_FIELD_members: */
@@ -1279,7 +1308,7 @@ enum bch_member_state {
struct bch_sb_field_members {
struct bch_sb_field field;
- struct bch_member members[0];
+ struct bch_member members[];
};
/* BCH_SB_FIELD_crypt: */
@@ -1377,19 +1406,19 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
- __u8 devs[0];
+ __u8 devs[];
} __packed;
struct bch_sb_field_replicas_v0 {
struct bch_sb_field field;
- struct bch_replicas_entry_v0 entries[0];
+ struct bch_replicas_entry_v0 entries[];
} __packed __aligned(8);
struct bch_replicas_entry {
__u8 data_type;
__u8 nr_devs;
__u8 nr_required;
- __u8 devs[0];
+ __u8 devs[];
} __packed;
#define replicas_entry_bytes(_i) \
@@ -1397,7 +1426,7 @@ struct bch_replicas_entry {
struct bch_sb_field_replicas {
struct bch_sb_field field;
- struct bch_replicas_entry entries[0];
+ struct bch_replicas_entry entries[];
} __packed __aligned(8);
/* BCH_SB_FIELD_quota: */
@@ -1432,7 +1461,7 @@ LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
struct bch_sb_field_disk_groups {
struct bch_sb_field field;
- struct bch_disk_group entries[0];
+ struct bch_disk_group entries[];
} __packed __aligned(8);
/* BCH_SB_FIELD_counters */
@@ -1525,7 +1554,7 @@ enum bch_persistent_counters {
struct bch_sb_field_counters {
struct bch_sb_field field;
- __le64 d[0];
+ __le64 d[];
};
/*
@@ -1539,10 +1568,8 @@ struct jset_entry {
__u8 type; /* designates what this jset holds */
__u8 pad[3];
- union {
- struct bkey_i start[0];
- __u64 _data[0];
- };
+ struct bkey_i start[0];
+ __u64 _data[];
};
struct bch_sb_field_clean {
@@ -1553,10 +1580,8 @@ struct bch_sb_field_clean {
__le16 _write_clock;
__le64 journal_seq;
- union {
- struct jset_entry start[0];
- __u64 _data[0];
- };
+ struct jset_entry start[0];
+ __u64 _data[];
};
struct journal_seq_blacklist_entry {
@@ -1567,10 +1592,8 @@ struct journal_seq_blacklist_entry {
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
- union {
- struct journal_seq_blacklist_entry start[0];
- __u64 _data[0];
- };
+ struct journal_seq_blacklist_entry start[0];
+ __u64 _data[];
};
/* Superblock: */
@@ -1645,7 +1668,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_max
};
-static const unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
+static const __maybe_unused
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@@ -1706,10 +1730,8 @@ struct bch_sb {
struct bch_sb_layout layout;
- union {
- struct bch_sb_field start[0];
- __le64 _data[0];
- };
+ struct bch_sb_field start[0];
+ __le64 _data[];
} __packed __aligned(8);
/*
@@ -1954,7 +1976,7 @@ enum bch_csum_type {
BCH_CSUM_NR
};
-static const unsigned bch_crc_bytes[] = {
+static const __maybe_unused unsigned bch_crc_bytes[] = {
[BCH_CSUM_none] = 0,
[BCH_CSUM_crc32c_nonzero] = 4,
[BCH_CSUM_crc32c] = 4,
@@ -2186,10 +2208,8 @@ struct jset {
__le64 last_seq;
- union {
- struct jset_entry start[0];
- __u64 _data[0];
- };
+ struct jset_entry start[0];
+ __u64 _data[];
} __packed __aligned(8);
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
@@ -2259,7 +2279,10 @@ enum btree_id_flags {
x(snapshot_trees, 15, 0, \
BIT_ULL(KEY_TYPE_snapshot_tree)) \
x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \
- BIT_ULL(KEY_TYPE_set))
+ BIT_ULL(KEY_TYPE_set)) \
+ x(logged_ops, 17, 0, \
+ BIT_ULL(KEY_TYPE_logged_op_truncate)| \
+ BIT_ULL(KEY_TYPE_logged_op_finsert))
enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr,
@@ -2294,10 +2317,8 @@ struct bset {
__le16 version;
__le16 u64s; /* count of d[] in u64s */
- union {
- struct bkey_packed start[0];
- __u64 _data[0];
- };
+ struct bkey_packed start[0];
+ __u64 _data[];
} __packed __aligned(8);
LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c
index 0a5bfe6..abdb055 100644
--- a/libbcachefs/bkey.c
+++ b/libbcachefs/bkey.c
@@ -127,7 +127,7 @@ static void pack_state_finish(struct pack_state *state,
struct bkey_packed *k)
{
EBUG_ON(state->p < k->_data);
- EBUG_ON(state->p >= k->_data + state->format->key_u64s);
+ EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s);
*state->p = state->w;
}
@@ -308,9 +308,14 @@ struct bpos __bkey_unpack_pos(const struct bkey_format *format,
/**
* bch2_bkey_pack_key -- pack just the key, not the value
+ * @out: packed result
+ * @in: key to pack
+ * @format: format of packed result
+ *
+ * Returns: true on success, false on failure
*/
bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
- const struct bkey_format *format)
+ const struct bkey_format *format)
{
struct pack_state state = pack_state_init(format, out);
u64 *w = out->_data;
@@ -336,9 +341,12 @@ bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
/**
* bch2_bkey_unpack -- unpack the key and the value
+ * @b: btree node of @src key (for packed format)
+ * @dst: unpacked result
+ * @src: packed input
*/
void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
- const struct bkey_packed *src)
+ const struct bkey_packed *src)
{
__bkey_unpack_key(b, &dst->k, src);
@@ -349,19 +357,24 @@ void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
/**
* bch2_bkey_pack -- pack the key and the value
+ * @dst: packed result
+ * @src: unpacked input
+ * @format: format of packed result
+ *
+ * Returns: true on success, false on failure
*/
-bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
- const struct bkey_format *format)
+bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src,
+ const struct bkey_format *format)
{
struct bkey_packed tmp;
- if (!bch2_bkey_pack_key(&tmp, &in->k, format))
+ if (!bch2_bkey_pack_key(&tmp, &src->k, format))
return false;
- memmove_u64s((u64 *) out + format->key_u64s,
- &in->v,
- bkey_val_u64s(&in->k));
- memcpy_u64s_small(out, &tmp, format->key_u64s);
+ memmove_u64s((u64 *) dst + format->key_u64s,
+ &src->v,
+ bkey_val_u64s(&src->k));
+ memcpy_u64s_small(dst, &tmp, format->key_u64s);
return true;
}
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index 51969a4..5184502 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -52,7 +52,7 @@ struct bkey_s {
static inline struct bkey_i *bkey_next(struct bkey_i *k)
{
- return (struct bkey_i *) (k->_data + k->k.u64s);
+ return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
}
#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
@@ -397,7 +397,7 @@ static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
}
#define bkeyp_val(_format, _k) \
- ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
+ ((struct bch_val *) ((u64 *) (_k)->_data + bkeyp_key_u64s(_format, _k)))
extern const struct bkey_format bch2_bkey_format_current;
@@ -732,7 +732,7 @@ static inline unsigned high_word_offset(const struct bkey_format *f)
#error edit for your odd byteorder.
#endif
-#define high_word(f, k) ((k)->_data + high_word_offset(f))
+#define high_word(f, k) ((u64 *) (k)->_data + high_word_offset(f))
#define next_word(p) nth_word(p, 1)
#define prev_word(p) nth_word(p, -1)
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index 6547142..be9f012 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -10,6 +10,7 @@
#include "error.h"
#include "extents.h"
#include "inode.h"
+#include "io_misc.h"
#include "lru.h"
#include "quota.h"
#include "reflink.h"
@@ -25,7 +26,7 @@ const char * const bch2_bkey_types[] = {
};
static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@@ -39,7 +40,7 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != 0)",
@@ -55,7 +56,7 @@ static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@@ -70,7 +71,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@@ -91,7 +92,7 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
})
static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != %zu)",
@@ -368,7 +369,6 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
{
const struct bkey_ops *ops;
struct bkey uk;
- struct bkey_s u;
unsigned nr_compat = 5;
int i;
@@ -433,7 +433,9 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
}
break;
- case 4:
+ case 4: {
+ struct bkey_s u;
+
if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k));
} else {
@@ -450,6 +452,7 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
if (ops->compat)
ops->compat(btree_id, version, big_endian, write, u);
break;
+ }
default:
BUG();
}
diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/bkey_sort.h
index 79cf11d..7c0f0b1 100644
--- a/libbcachefs/bkey_sort.h
+++ b/libbcachefs/bkey_sort.h
@@ -9,14 +9,24 @@ struct sort_iter {
struct sort_iter_set {
struct bkey_packed *k, *end;
- } data[MAX_BSETS + 1];
+ } data[];
};
-static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b, unsigned size)
{
iter->b = b;
iter->used = 0;
- iter->size = ARRAY_SIZE(iter->data);
+ iter->size = size;
+}
+
+struct sort_iter_stack {
+ struct sort_iter iter;
+ struct sort_iter_set sets[MAX_BSETS + 1];
+};
+
+static inline void sort_iter_stack_init(struct sort_iter_stack *iter, struct btree *b)
+{
+ sort_iter_init(&iter->iter, b, ARRAY_SIZE(iter->sets));
}
static inline void sort_iter_add(struct sort_iter *iter,
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
index bcdf28f..bb73ba9 100644
--- a/libbcachefs/bset.c
+++ b/libbcachefs/bset.c
@@ -172,10 +172,10 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
printk(KERN_ERR "iter was:");
btree_node_iter_for_each(_iter, set) {
- struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
- struct bset_tree *t = bch2_bkey_to_bset(b, k);
+ struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k);
+ struct bset_tree *t = bch2_bkey_to_bset(b, k2);
printk(" [%zi %zi]", t - b->set,
- k->_data - bset(b, t)->_data);
+ k2->_data - bset(b, t)->_data);
}
panic("\n");
}
@@ -232,7 +232,7 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
- struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
+ struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s);
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
#if 0
@@ -300,7 +300,8 @@ static unsigned bkey_float_byte_offset(unsigned idx)
}
struct ro_aux_tree {
- struct bkey_float f[0];
+ u8 nothing[0];
+ struct bkey_float f[];
};
struct rw_aux_tree {
@@ -476,7 +477,7 @@ static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
{
unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
- return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
+ return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s);
}
static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
@@ -1010,8 +1011,8 @@ void bch2_bset_insert(struct btree *b,
btree_keys_account_key_add(&b->nr, t - b->set, src);
if (src->u64s != clobber_u64s) {
- u64 *src_p = where->_data + clobber_u64s;
- u64 *dst_p = where->_data + src->u64s;
+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
+ u64 *dst_p = (u64 *) where->_data + src->u64s;
EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
(int) clobber_u64s - src->u64s);
@@ -1037,7 +1038,7 @@ void bch2_bset_delete(struct btree *b,
unsigned clobber_u64s)
{
struct bset_tree *t = bset_tree_last(b);
- u64 *src_p = where->_data + clobber_u64s;
+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
u64 *dst_p = where->_data;
bch2_bset_verify_rw_aux_tree(b, t);
@@ -1188,7 +1189,7 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
case BSET_RO_AUX_TREE:
return bset_search_tree(b, t, search, lossy_packed_search);
default:
- unreachable();
+ BUG();
}
}
@@ -1268,9 +1269,13 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
}
/**
- * bch_btree_node_iter_init - initialize a btree node iterator, starting from a
+ * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a
* given position
*
+ * @iter: iterator to initialize
+ * @b: btree node to search
+ * @search: search key
+ *
* Main entry point to the lookup code for individual btree nodes:
*
* NOTE:
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index a8283fd..7c6769c 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -795,7 +795,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
six_unlock_intent(&b->c.lock);
/* Unlock before doing IO: */
- if (trans && sync)
+ if (path && sync)
bch2_trans_unlock_noassert(trans);
bch2_btree_node_read(c, b, sync);
@@ -934,7 +934,7 @@ retry:
}
if (unlikely(need_relock)) {
- int ret = bch2_trans_relock(trans) ?:
+ ret = bch2_trans_relock(trans) ?:
bch2_btree_path_relock_intent(trans, path);
if (ret) {
six_unlock_type(&b->c.lock, lock_type);
@@ -965,11 +965,20 @@ retry:
}
/**
- * bch_btree_node_get - find a btree node in the cache and lock it, reading it
+ * bch2_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
+ * @trans: btree transaction object
+ * @path: btree_path being traversed
+ * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2)
+ * @level: level of btree node being looked up (0 == leaf node)
+ * @lock_type: SIX_LOCK_read or SIX_LOCK_intent
+ * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek())
+ *
* The btree node will have either a read or a write lock held, depending on
* the @write parameter.
+ *
+ * Returns: btree node or ERR_PTR()
*/
struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
const struct bkey_i *k, unsigned level,
@@ -1016,28 +1025,8 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
}
if (unlikely(btree_node_read_in_flight(b))) {
- u32 seq = six_lock_seq(&b->c.lock);
-
six_unlock_type(&b->c.lock, lock_type);
- bch2_trans_unlock(trans);
-
- bch2_btree_node_wait_on_read(b);
-
- /*
- * should_be_locked is not set on this path yet, so we need to
- * relock it specifically:
- */
- if (trans) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
-
- if (!six_relock_type(&b->c.lock, lock_type, seq))
- return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
}
prefetch(b->aux_data);
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 83dcd9e..97fbd83 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -529,13 +529,11 @@ fsck_err:
int bch2_check_topology(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree *b;
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
@@ -546,8 +544,8 @@ int bch2_check_topology(struct bch_fs *c)
if (btree_node_fake(b))
continue;
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(&trans, b);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ ret = bch2_btree_repair_topology_recurse(trans, b);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
@@ -556,7 +554,7 @@ int bch2_check_topology(struct bch_fs *c)
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -566,8 +564,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
struct bkey_s_c *k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
- const union bch_extent_entry *entry;
+ struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k);
+ const union bch_extent_entry *entry_c;
struct extent_ptr_decoded p = { 0 };
bool do_update = false;
struct printbuf buf = PRINTBUF;
@@ -577,10 +575,10 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
* XXX
* use check_bucket_ref here
*/
- bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
+ bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
if (!g->gen_valid &&
(c->opts.reconstruct_alloc ||
@@ -1068,15 +1066,13 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
enum btree_id ids[BTREE_ID_NR];
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
if (initial)
- trans.is_initial_gc = true;
+ trans->is_initial_gc = true;
for (i = 0; i < BTREE_ID_NR; i++)
ids[i] = i;
@@ -1084,22 +1080,22 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
for (i = 0; i < BTREE_ID_NR && !ret; i++)
ret = initial
- ? bch2_gc_btree_init(&trans, ids[i], metadata_only)
- : bch2_gc_btree(&trans, ids[i], initial, metadata_only);
+ ? bch2_gc_btree_init(trans, ids[i], metadata_only)
+ : bch2_gc_btree(trans, ids[i], initial, metadata_only);
for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
if (!bch2_btree_id_root(c, i)->alive)
continue;
ret = initial
- ? bch2_gc_btree_init(&trans, i, metadata_only)
- : bch2_gc_btree(&trans, i, initial, metadata_only);
+ ? bch2_gc_btree_init(trans, i, metadata_only)
+ : bch2_gc_btree(trans, i, initial, metadata_only);
}
if (ret < 0)
bch_err_fn(c, ret);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -1220,14 +1216,6 @@ static int bch2_gc_done(struct bch_fs *c,
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f))) \
dst->_f = src->_f
-#define copy_stripe_field(_f, _msg, ...) \
- if (dst->_f != src->_f && \
- (!verify || \
- fsck_err(c, "stripe %zu has wrong "_msg \
- ": got %u, should be %u", \
- iter.pos, ##__VA_ARGS__, \
- dst->_f, src->_f))) \
- dst->_f = src->_f
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
#define copy_fs_field(_f, _msg, ...) \
@@ -1249,7 +1237,7 @@ static int bch2_gc_done(struct bch_fs *c,
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
- };
+ }
{
unsigned nr = fs_usage_u64s(c);
@@ -1469,37 +1457,35 @@ fsck_err:
static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
for_each_member_device(ca, c, i) {
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW,
- bch2_alloc_write_key(&trans, &iter, k, metadata_only));
+ bch2_alloc_write_key(trans, &iter, k, metadata_only));
if (ret < 0) {
- bch_err(c, "error writing alloc info: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
percpu_ref_put(&ca->ref);
break;
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
{
struct bch_dev *ca;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bucket *g;
@@ -1515,17 +1501,16 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
if (!buckets) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
- return -BCH_ERR_ENOMEM_gc_alloc_start;
+ ret = -BCH_ERR_ENOMEM_gc_alloc_start;
+ goto err;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets_gc, buckets);
- };
-
- bch2_trans_init(&trans, c, 0, 0);
+ }
- for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = gc_bucket(ca, k.k->p.offset);
@@ -1546,13 +1531,11 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
g->stripe_redundancy = a->stripe_redundancy;
}
}
- bch2_trans_iter_exit(&trans, &iter);
-
- bch2_trans_exit(&trans);
-
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ bch2_trans_put(trans);
if (ret)
- bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret));
-
+ bch_err_fn(c, ret);
return ret;
}
@@ -1575,7 +1558,7 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
g->dirty_sectors = 0;
g->cached_sectors = 0;
}
- };
+ }
}
static int bch2_gc_write_reflink_key(struct btree_trans *trans,
@@ -1627,7 +1610,7 @@ fsck_err:
static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
size_t idx = 0;
@@ -1636,23 +1619,23 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_gc_write_reflink_key(&trans, &iter, k, &idx));
+ bch2_gc_write_reflink_key(trans, &iter, k, &idx));
c->reflink_gc_nr = 0;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
@@ -1661,10 +1644,10 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
c->reflink_gc_nr = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
@@ -1682,9 +1665,9 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
r->size = k.k->size;
r->refcount = 0;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -1751,7 +1734,7 @@ fsck_err:
static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
@@ -1759,15 +1742,15 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_gc_write_stripes_key(&trans, &iter, k));
+ bch2_gc_write_stripes_key(trans, &iter, k));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -1779,6 +1762,12 @@ static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
+ * @c: filesystem object
+ * @initial: are we in recovery?
+ * @metadata_only: are we just checking metadata references, or everything?
+ *
+ * Returns: 0 on success, or standard errcode on failure
+ *
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
@@ -1947,7 +1936,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
int bch2_gc_gens(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
@@ -1965,7 +1954,7 @@ int bch2_gc_gens(struct bch_fs *c)
trace_and_count(c, gc_gens_start, c);
down_read(&c->gc_lock);
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
for_each_member_device(ca, c, i) {
struct bucket_gens *gens;
@@ -1988,33 +1977,31 @@ int bch2_gc_gens(struct bch_fs *c)
for (i = 0; i < BTREE_ID_NR; i++)
if (btree_type_has_ptrs(i)) {
- struct btree_iter iter;
- struct bkey_s_c k;
-
c->gc_gens_btree = i;
c->gc_gens_pos = POS_MIN;
- ret = for_each_btree_key_commit(&trans, iter, i,
+
+ ret = for_each_btree_key_commit(trans, iter, i,
POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
- gc_btree_gens_key(&trans, &iter, k));
+ gc_btree_gens_key(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
- bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
goto err;
}
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN,
BTREE_ITER_PREFETCH,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
- bch2_alloc_write_oldest_gen(&trans, &iter, k));
+ bch2_alloc_write_oldest_gen(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
- bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
goto err;
@@ -2031,7 +2018,7 @@ err:
ca->oldest_gen = NULL;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
up_read(&c->gc_lock);
mutex_unlock(&c->gc_gens_lock);
return ret;
@@ -2086,7 +2073,7 @@ static int bch2_gc_thread(void *arg)
ret = bch2_gc_gens(c);
#endif
if (ret < 0)
- bch_err(c, "btree gc failed: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
debug_check_no_locks_held();
}
@@ -2116,7 +2103,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p)) {
- bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p)));
+ bch_err_fn(c, PTR_ERR(p));
return PTR_ERR(p);
}
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 3b65484..a869cf6 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -14,7 +14,7 @@
#include "debug.h"
#include "error.h"
#include "extents.h"
-#include "io.h"
+#include "io_write.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "recovery.h"
@@ -106,8 +106,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
vpfree(p, size);
}
-static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
- bool *used_mempool)
+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
+ bool *used_mempool)
{
unsigned flags = memalloc_nofs_save();
void *p;
@@ -115,7 +115,7 @@ static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
BUG_ON(size > btree_bytes(c));
*used_mempool = false;
- p = vpmalloc_noprof(size, __GFP_NOWARN|GFP_NOWAIT);
+ p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
if (!p) {
*used_mempool = true;
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
@@ -123,8 +123,6 @@ static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
memalloc_nofs_restore(flags);
return p;
}
-#define btree_bounce_alloc(_c, _size, _used_mempool) \
- alloc_hooks(btree_bounce_alloc_noprof(_c, _size, _used_mempool))
static void sort_bkey_ptrs(const struct btree *bt,
struct bkey_packed **ptrs, unsigned nr)
@@ -294,7 +292,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
bool filter_whiteouts)
{
struct btree_node *out;
- struct sort_iter sort_iter;
+ struct sort_iter_stack sort_iter;
struct bset_tree *t;
struct bset *start_bset = bset(b, &b->set[start_idx]);
bool used_mempool = false;
@@ -303,13 +301,13 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
bool sorting_entire_node = start_idx == 0 &&
end_idx == b->nsets;
- sort_iter_init(&sort_iter, b);
+ sort_iter_stack_init(&sort_iter, b);
for (t = b->set + start_idx;
t < b->set + end_idx;
t++) {
u64s += le16_to_cpu(bset(b, t)->u64s);
- sort_iter_add(&sort_iter,
+ sort_iter_add(&sort_iter.iter,
btree_bkey_first(b, t),
btree_bkey_last(b, t));
}
@@ -322,7 +320,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
start_time = local_clock();
- u64s = bch2_sort_keys(out->keys.start, &sort_iter, filter_whiteouts);
+ u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts);
out->keys.u64s = cpu_to_le16(u64s);
@@ -338,7 +336,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
start_bset->journal_seq = cpu_to_le64(seq);
if (sorting_entire_node) {
- unsigned u64s = le16_to_cpu(out->keys.u64s);
+ u64s = le16_to_cpu(out->keys.u64s);
BUG_ON(bytes != btree_bytes(c));
@@ -412,8 +410,6 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_verify_btree_nr_keys(dst);
}
-#define SORT_CRIT (4096 / sizeof(u64))
-
/*
* We're about to add another bset to the btree node, so if there's currently
* too many bsets - sort some of them together:
@@ -544,6 +540,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_str(out, ": ");
}
+__printf(8, 9)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
@@ -624,9 +621,6 @@ __cold
void bch2_btree_node_drop_keys_outside_node(struct btree *b)
{
struct bset_tree *t;
- struct bkey_s_c k;
- struct bkey unpacked;
- struct btree_node_iter iter;
for_each_bset(b, t) {
struct bset *i = bset(b, t);
@@ -662,6 +656,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
+ struct bkey_s_c k;
+ struct bkey unpacked;
+ struct btree_node_iter iter;
for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
BUG_ON(bpos_gt(k.k->p, b->data->max_key));
@@ -910,7 +907,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned u64s;
- unsigned blacklisted_written, nonblacklisted_written = 0;
unsigned ptr_written = btree_ptr_sectors_written(&b->key);
struct printbuf buf = PRINTBUF;
int ret = 0, retry_read = 0, write = READ;
@@ -920,8 +916,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
b->written = 0;
iter = mempool_alloc(&c->fill_iter, GFP_NOFS);
- sort_iter_init(iter, b);
- iter->size = (btree_blocks(c) + 1) * 2;
+ sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2);
if (bch2_meta_read_fault("btree"))
btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
@@ -1045,8 +1040,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sort_iter_add(iter,
vstruct_idx(i, 0),
vstruct_last(i));
-
- nonblacklisted_written = b->written;
}
if (ptr_written) {
@@ -1064,18 +1057,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
true),
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL,
"found bset signature after last bset");
-
- /*
- * Blacklisted bsets are those that were written after the most recent
- * (flush) journal write. Since there wasn't a flush, they may not have
- * made it to all devices - which means we shouldn't write new bsets
- * after them, as that could leave a gap and then reads from that device
- * wouldn't find all the bsets in that btree node - which means it's
- * important that we start writing new bsets after the most recent _non_
- * blacklisted bset:
- */
- blacklisted_written = b->written;
- b->written = nonblacklisted_written;
}
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
@@ -1143,9 +1124,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev);
- if (ca->mi.state != BCH_MEMBER_STATE_rw)
+ if (ca2->mi.state != BCH_MEMBER_STATE_rw)
set_btree_node_need_rewrite(b);
}
@@ -1227,19 +1208,17 @@ start:
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
- printbuf_exit(&buf);
if (saw_error && !btree_node_read_error(b)) {
- struct printbuf buf = PRINTBUF;
-
+ printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
__func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf);
- printbuf_exit(&buf);
bch2_btree_node_rewrite_async(c, b);
}
+ printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
}
@@ -1649,8 +1628,7 @@ err:
int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
const struct bkey_i *k, unsigned level)
{
- return bch2_trans_run(c, __bch2_btree_root_read(&trans, id, k, level));
-
+ return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
}
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
@@ -1712,15 +1690,13 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
- struct btree_trans trans;
-
- bch2_trans_init(&trans, c, 0, 0);
+ struct btree_trans *trans = bch2_trans_get(c);
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
__btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
}
static void btree_node_write_work(struct work_struct *work)
@@ -1749,7 +1725,7 @@ static void btree_node_write_work(struct work_struct *work)
}
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_btree_node_update_key_get_iter(&trans, b, &wbio->key,
+ bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_reclaim|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_NOFAIL|
@@ -1854,7 +1830,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
- struct sort_iter sort_iter;
+ struct sort_iter_stack sort_iter;
struct nonce nonce;
unsigned bytes_to_write, sectors_to_write, bytes, u64s;
u64 seq = 0;
@@ -1927,7 +1903,7 @@ do_write:
bch2_sort_whiteouts(c, b);
- sort_iter_init(&sort_iter, b);
+ sort_iter_stack_init(&sort_iter, b);
bytes = !b->written
? sizeof(struct btree_node)
@@ -1942,7 +1918,7 @@ do_write:
continue;
bytes += le16_to_cpu(i->u64s) * sizeof(u64);
- sort_iter_add(&sort_iter,
+ sort_iter_add(&sort_iter.iter,
btree_bkey_first(b, t),
btree_bkey_last(b, t));
seq = max(seq, le64_to_cpu(i->journal_seq));
@@ -1971,14 +1947,14 @@ do_write:
i->journal_seq = cpu_to_le64(seq);
i->u64s = 0;
- sort_iter_add(&sort_iter,
+ sort_iter_add(&sort_iter.iter,
unwritten_whiteouts_start(c, b),
unwritten_whiteouts_end(c, b));
SET_BSET_SEPARATE_WHITEOUTS(i, false);
b->whiteout_u64s = 0;
- u64s = bch2_sort_keys(i->start, &sort_iter, false);
+ u64s = bch2_sort_keys(i->start, &sort_iter.iter, false);
le16_add_cpu(&i->u64s, u64s);
BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index cd99bbb..7e03dd7 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -7,7 +7,7 @@
#include "btree_locking.h"
#include "checksum.h"
#include "extents.h"
-#include "io_types.h"
+#include "io_write_types.h"
struct bch_fs;
struct btree_write;
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 5216d33..4cee5e6 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -488,7 +488,6 @@ fixup_done:
if (!bch2_btree_node_iter_end(node_iter) &&
iter_current_key_modified &&
b->c.level) {
- struct bset_tree *t;
struct bkey_packed *k, *k2, *p;
k = bch2_btree_node_iter_peek_all(node_iter, b);
@@ -689,7 +688,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
if (t != BTREE_NODE_UNLOCKED) {
btree_node_unlock(trans, path, b->c.level);
six_lock_increment(&b->c.lock, (enum six_lock_type) t);
- mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
+ mark_btree_node_locked(trans, path, b->c.level, t);
}
bch2_btree_path_level_init(trans, path, b);
@@ -764,7 +763,8 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
- mark_btree_node_locked(trans, path, path->level, lock_type);
+ mark_btree_node_locked(trans, path, path->level,
+ (enum btree_node_locked_type) lock_type);
bch2_btree_path_level_init(trans, path, b);
return 0;
}
@@ -936,7 +936,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(trans, path, level + 1);
- mark_btree_node_locked(trans, path, level, lock_type);
+ mark_btree_node_locked(trans, path, level,
+ (enum btree_node_locked_type) lock_type);
path->level = level;
bch2_btree_path_level_init(trans, path, b);
@@ -1341,14 +1342,14 @@ static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *p
__bch2_path_free(trans, path);
}
-void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
+void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
{
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
trans->restart_count, restart_count,
(void *) trans->last_begin_ip);
}
-void bch2_trans_in_restart_error(struct btree_trans *trans)
+void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
{
panic("in transaction restart: %s, last restarted by %pS\n",
bch2_err_str(trans->restarted),
@@ -1493,7 +1494,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
static noinline void btree_path_overflow(struct btree_trans *trans)
{
bch2_dump_trans_paths_updates(trans);
- panic("trans path oveflow\n");
+ panic("trans path overflow\n");
}
static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
@@ -2046,8 +2047,12 @@ out:
}
/**
- * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
- * current position
+ * bch2_btree_iter_peek_upto() - returns first key greater than or equal to
+ * iterator's current position
+ * @iter: iterator to peek from
+ * @end: search limit: returns keys less than or equal to @end
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
{
@@ -2184,10 +2189,13 @@ end:
}
/**
- * bch2_btree_iter_peek_all_levels: returns the first key greater than or equal
- * to iterator's current position, returning keys from every level of the btree.
- * For keys at different levels of the btree that compare equal, the key from
- * the lower level (leaf) is returned first.
+ * bch2_btree_iter_peek_all_levels() - returns the first key greater than or
+ * equal to iterator's current position, returning keys from every level of the
+ * btree. For keys at different levels of the btree that compare equal, the key
+ * from the lower level (leaf) is returned first.
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
{
@@ -2278,8 +2286,11 @@ out_no_locked:
}
/**
- * bch2_btree_iter_next: returns first key greater than iterator's current
+ * bch2_btree_iter_next() - returns first key greater than iterator's current
* position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
@@ -2290,8 +2301,11 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
}
/**
- * bch2_btree_iter_peek_prev: returns first key less than or equal to
+ * bch2_btree_iter_peek_prev() - returns first key less than or equal to
* iterator's current position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
@@ -2414,8 +2428,11 @@ out_no_locked:
}
/**
- * bch2_btree_iter_prev: returns first key less than iterator's current
+ * bch2_btree_iter_prev() - returns first key less than iterator's current
* position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
{
@@ -2722,7 +2739,7 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
void bch2_trans_iter_init_outlined(struct btree_trans *trans,
struct btree_iter *iter,
- unsigned btree_id, struct bpos pos,
+ enum btree_id btree_id, struct bpos pos,
unsigned flags)
{
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
@@ -2830,6 +2847,8 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
* bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
*
+ * Returns: current restart counter, to be used with trans_was_restarted()
+ *
* While iterating over nodes or updating nodes a attempt to lock a btree node
* may return BCH_ERR_transaction_restart when the trylock fails. When this
* occurs bch2_trans_begin() should be called and the transaction retried.
@@ -2887,28 +2906,23 @@ u32 bch2_trans_begin(struct btree_trans *trans)
return trans->restart_count;
}
-static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
+static struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
{
- size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX;
- size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
- void *p = NULL;
-
- BUG_ON(trans->used_mempool);
+ struct btree_trans *trans;
-#ifdef __KERNEL__
- p = this_cpu_xchg(c->btree_paths_bufs->path, NULL);
-#endif
- if (!p) {
- p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
- /*
- * paths need to be zeroed, bch2_check_for_deadlock looks at
- * paths in other threads
- */
- memset(p, 0, paths_bytes);
+ if (IS_ENABLED(__KERNEL__)) {
+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
+ if (trans)
+ return trans;
}
- trans->paths = p; p += paths_bytes;
- trans->updates = p; p += updates_bytes;
+ trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
+ /*
+ * paths need to be zeroed, bch2_check_for_deadlock looks at
+ * paths in other threads
+ */
+ memset(&trans->paths, 0, sizeof(trans->paths));
+ return trans;
}
const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
@@ -2928,13 +2942,16 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
return i;
}
-void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx)
+struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier)
{
+ struct btree_trans *trans;
struct btree_transaction_stats *s;
bch2_assert_btree_nodes_not_locked();
+ trans = bch2_trans_alloc(c);
+
memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
@@ -2946,8 +2963,6 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
closure_init_stack(&trans->ref);
- bch2_trans_alloc_paths(trans, c);
-
s = btree_trans_stats(trans);
if (s && s->max_mem) {
unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
@@ -2993,6 +3008,8 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
list_add_done:
seqmutex_unlock(&c->btree_trans_lock);
}
+
+ return trans;
}
static void check_btree_paths_leaked(struct btree_trans *trans)
@@ -3017,7 +3034,7 @@ leaked:
#endif
}
-void bch2_trans_exit(struct btree_trans *trans)
+void bch2_trans_put(struct btree_trans *trans)
__releases(&c->btree_trans_barrier)
{
struct btree_insert_entry *i;
@@ -3063,18 +3080,11 @@ void bch2_trans_exit(struct btree_trans *trans)
else
kfree(trans->mem);
-#ifdef __KERNEL__
- /*
- * Userspace doesn't have a real percpu implementation:
- */
- trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
-#endif
-
- if (trans->paths)
- mempool_free(trans->paths, &c->btree_paths_pool);
-
- trans->mem = (void *) 0x1;
- trans->paths = (void *) 0x1;
+ /* Userspace doesn't have a real percpu implementation: */
+ if (IS_ENABLED(__KERNEL__))
+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
+ if (trans)
+ mempool_free(trans, &c->btree_trans_pool);
}
static void __maybe_unused
@@ -3152,6 +3162,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
struct btree_transaction_stats *s;
+ struct btree_trans *trans;
+ int cpu;
+
+ trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
+ if (trans)
+ panic("%s leaked btree_trans\n", trans->fn);
+
+ if (c->btree_trans_bufs)
+ for_each_possible_cpu(cpu)
+ kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans);
+ free_percpu(c->btree_trans_bufs);
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
@@ -3163,13 +3184,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
if (c->btree_trans_barrier_initialized)
cleanup_srcu_struct(&c->btree_trans_barrier);
mempool_exit(&c->btree_trans_mem_pool);
- mempool_exit(&c->btree_paths_pool);
+ mempool_exit(&c->btree_trans_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
struct btree_transaction_stats *s;
- unsigned nr = BTREE_ITER_MAX;
int ret;
for (s = c->btree_transaction_stats;
@@ -3182,9 +3202,12 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
INIT_LIST_HEAD(&c->btree_trans_list);
seqmutex_init(&c->btree_trans_lock);
- ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
- sizeof(struct btree_path) * nr +
- sizeof(struct btree_insert_entry) * nr) ?:
+ c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
+ if (!c->btree_trans_bufs)
+ return -ENOMEM;
+
+ ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
+ sizeof(struct btree_trans)) ?:
mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
BTREE_TRANS_MEM_MAX) ?:
init_srcu_struct(&c->btree_trans_barrier);
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 8876f2b..fbe2734 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -276,12 +276,14 @@ int bch2_trans_relock_notrace(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *);
-static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count)
+static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
{
- return restart_count != trans->restart_count;
+ return restart_count != trans->restart_count
+ ? -BCH_ERR_transaction_restart_nested
+ : 0;
}
-void bch2_trans_restart_error(struct btree_trans *, u32);
+void __noreturn bch2_trans_restart_error(struct btree_trans *, u32);
static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
u32 restart_count)
@@ -290,7 +292,7 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
bch2_trans_restart_error(trans, restart_count);
}
-void bch2_trans_in_restart_error(struct btree_trans *);
+void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
{
@@ -463,7 +465,7 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
}
void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *,
- unsigned, struct bpos, unsigned);
+ enum btree_id, struct bpos, unsigned);
static inline void bch2_trans_iter_init(struct btree_trans *trans,
struct btree_iter *iter,
@@ -672,17 +674,17 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
#define lockrestart_do(_trans, _do) \
({ \
u32 _restart_count; \
- int _ret; \
+ int _ret2; \
\
do { \
_restart_count = bch2_trans_begin(_trans); \
- _ret = (_do); \
- } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \
+ _ret2 = (_do); \
+ } while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \
\
- if (!_ret) \
+ if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
- _ret; \
+ _ret2; \
})
/*
@@ -697,26 +699,23 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
#define nested_lockrestart_do(_trans, _do) \
({ \
u32 _restart_count, _orig_restart_count; \
- int _ret; \
+ int _ret2; \
\
_restart_count = _orig_restart_count = (_trans)->restart_count; \
\
- while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\
+ while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\
_restart_count = bch2_trans_begin(_trans); \
\
- if (!_ret) \
+ if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
- if (!_ret && trans_was_restarted(_trans, _orig_restart_count)) \
- _ret = -BCH_ERR_transaction_restart_nested; \
- \
- _ret; \
+ _ret2 ?: trans_was_restarted(_trans, _restart_count); \
})
#define for_each_btree_key2(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
- int _ret = 0; \
+ int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@@ -724,15 +723,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
while (1) { \
u32 _restart_count = bch2_trans_begin(_trans); \
\
- _ret = 0; \
+ _ret3 = 0; \
(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
if (!(_k).k) \
break; \
\
- _ret = bkey_err(_k) ?: (_do); \
- if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+ _ret3 = bkey_err(_k) ?: (_do); \
+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
- if (_ret) \
+ if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
@@ -740,13 +739,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
- _ret; \
+ _ret3; \
})
#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _do) \
({ \
- int _ret = 0; \
+ int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@@ -754,15 +753,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
while (1) { \
u32 _restart_count = bch2_trans_begin(_trans); \
\
- _ret = 0; \
+ _ret3 = 0; \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\
if (!(_k).k) \
break; \
\
- _ret = bkey_err(_k) ?: (_do); \
- if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+ _ret3 = bkey_err(_k) ?: (_do); \
+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
- if (_ret) \
+ if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
@@ -770,13 +769,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
- _ret; \
+ _ret3; \
})
#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
- int _ret = 0; \
+ int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@@ -785,14 +784,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
if (!(_k).k) { \
- _ret = 0; \
+ _ret3 = 0; \
break; \
} \
\
- _ret = bkey_err(_k) ?: (_do); \
- if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+ _ret3 = bkey_err(_k) ?: (_do); \
+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
- if (_ret) \
+ if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_rewind(&(_iter))) \
@@ -800,7 +799,7 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
- _ret; \
+ _ret3; \
})
#define for_each_btree_key_commit(_trans, _iter, _btree_id, \
@@ -916,21 +915,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
-void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned);
-void bch2_trans_exit(struct btree_trans *);
+
+struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
+void bch2_trans_put(struct btree_trans *);
extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
unsigned bch2_trans_get_fn_idx(const char *);
-#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \
-do { \
+#define bch2_trans_get(_c) \
+({ \
static unsigned trans_fn_idx; \
\
if (unlikely(!trans_fn_idx)) \
trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
- \
- __bch2_trans_init(_trans, _c, trans_fn_idx); \
-} while (0)
+ __bch2_trans_get(_c, trans_fn_idx); \
+})
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index f7c001d..1407f69 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -243,8 +243,6 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
}
if (ck) {
- int ret;
-
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_);
if (unlikely(ret)) {
bkey_cached_move_to_freelist(bc, ck);
@@ -253,7 +251,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
path->l[0].b = (void *) ck;
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
- mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
ret = bch2_btree_node_lock_write(trans, path, &ck->c);
if (unlikely(ret)) {
@@ -331,7 +329,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
}
- mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
}
ck->c.level = 0;
@@ -479,7 +477,7 @@ retry:
if (!ck)
goto retry;
- mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
@@ -497,7 +495,8 @@ retry:
goto retry;
}
- mark_btree_node_locked(trans, path, 0, lock_want);
+ mark_btree_node_locked(trans, path, 0,
+ (enum btree_node_locked_type) lock_want);
}
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
@@ -579,7 +578,8 @@ retry:
goto retry;
}
- mark_btree_node_locked(trans, path, 0, lock_want);
+ mark_btree_node_locked(trans, path, 0,
+ (enum btree_node_locked_type) lock_want);
}
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
@@ -705,13 +705,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
+ btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read);
key = ck->key;
if (ck->journal.seq != seq ||
@@ -728,13 +726,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
}
six_unlock_read(&ck->c.lock);
- ret = commit_do(&trans, NULL, NULL, 0,
- btree_key_cache_flush_pos(&trans, key, seq,
+ ret = commit_do(trans, NULL, NULL, 0,
+ btree_key_cache_flush_pos(trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -1065,7 +1063,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
{
- prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed));
+ prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed));
prt_newline(out);
prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
prt_newline(out);
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index 22e2cd3..6231e9f 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -91,7 +91,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
static inline void mark_btree_node_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level,
- enum six_lock_type type)
+ enum btree_node_locked_type type)
{
mark_btree_node_locked_noreset(path, level, (enum btree_node_locked_type) type);
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index eafb038..04c1f46 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -163,13 +163,11 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
unsigned long old, new, v;
unsigned idx = w - b->writes;
- bch2_trans_init(&trans, c, 0, 0);
-
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
v = READ_ONCE(b->flags);
do {
@@ -188,7 +186,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return 0;
}
@@ -214,7 +212,11 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
}
/**
- * btree_insert_key - insert a key one key into a leaf node
+ * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node
+ * @trans: btree transaction object
+ * @path: path pointing to @insert's pos
+ * @insert: key to insert
+ * @journal_seq: sequence number of journal reservation
*/
inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
struct btree_path *path,
@@ -555,7 +557,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct btree_write_buffered_key *wb;
struct btree_trans_commit_hook *h;
unsigned u64s = 0;
- bool marking = false;
int ret;
if (race_fault()) {
@@ -584,9 +585,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
*stopped_at = i;
return ret;
}
-
- if (btree_node_type_needs_gc(i->bkey_type))
- marking = true;
}
if (trans->nr_wb_updates &&
@@ -778,7 +776,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
}
-#ifdef CONFIG_BCACHEFS_DEBUG
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags,
struct btree_insert_entry *i,
struct printbuf *err)
@@ -804,7 +801,6 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un
return -EINVAL;
}
-#endif
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
@@ -1029,7 +1025,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret)
goto out_reset;
-#ifdef CONFIG_BCACHEFS_DEBUG
trans_for_each_update(trans, i) {
struct printbuf buf = PRINTBUF;
enum bkey_invalid_flags invalid_flags = 0;
@@ -1046,7 +1041,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret)
return ret;
}
-#endif
if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
ret = do_bch2_trans_commit_to_journal_replay(trans);
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 71ad389..67ecb5e 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -194,34 +194,34 @@ struct btree_node_iter {
/*
* Iterate over all possible positions, synthesizing deleted keys for holes:
*/
-static const u16 BTREE_ITER_SLOTS = 1 << 0;
-static const u16 BTREE_ITER_ALL_LEVELS = 1 << 1;
+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
+static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1;
/*
* Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates:
*/
-static const u16 BTREE_ITER_INTENT = 1 << 2;
+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2;
/*
* Causes the btree iterator code to prefetch additional btree nodes from disk:
*/
-static const u16 BTREE_ITER_PREFETCH = 1 << 3;
+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3;
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
*/
-static const u16 BTREE_ITER_IS_EXTENTS = 1 << 4;
-static const u16 BTREE_ITER_NOT_EXTENTS = 1 << 5;
-static const u16 BTREE_ITER_CACHED = 1 << 6;
-static const u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7;
-static const u16 BTREE_ITER_WITH_UPDATES = 1 << 8;
-static const u16 BTREE_ITER_WITH_JOURNAL = 1 << 9;
-static const u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
-static const u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11;
-static const u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12;
-static const u16 BTREE_ITER_NOPRESERVE = 1 << 13;
-static const u16 BTREE_ITER_CACHED_NOFILL = 1 << 14;
-static const u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15;
-#define __BTREE_ITER_FLAGS_END 16
+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4;
+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5;
+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6;
+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7;
+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8;
+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9;
+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11;
+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12;
+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13;
+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14;
+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15;
+#define __BTREE_ITER_FLAGS_END 16
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -459,8 +459,8 @@ struct btree_trans {
void *mem;
u8 sorted[BTREE_ITER_MAX + 8];
- struct btree_path *paths;
- struct btree_insert_entry *updates;
+ struct btree_path paths[BTREE_ITER_MAX];
+ struct btree_insert_entry updates[BTREE_ITER_MAX];
struct btree_write_buffered_key *wb_updates;
/* update path: */
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 880ce74..324767c 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -124,7 +124,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
struct bkey_s_c old_k, new_k;
snapshot_id_list s;
struct bkey_i *update;
- int ret;
+ int ret = 0;
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
return 0;
@@ -466,11 +466,49 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
return 0;
}
+static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree_path *path)
+{
+ if (!iter->key_cache_path ||
+ !iter->key_cache_path->should_be_locked ||
+ !bpos_eq(iter->key_cache_path->pos, iter->pos)) {
+ struct bkey_cached *ck;
+ int ret;
+
+ if (!iter->key_cache_path)
+ iter->key_cache_path =
+ bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_CACHED, _THIS_IP_);
+
+ iter->key_cache_path =
+ bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
+ iter->flags & BTREE_ITER_INTENT,
+ _THIS_IP_);
+
+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
+ BTREE_ITER_CACHED);
+ if (unlikely(ret))
+ return ret;
+
+ ck = (void *) iter->key_cache_path->l[0].b;
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
+ }
+
+ btree_path_set_should_be_locked(iter->key_cache_path);
+ }
+
+ return 0;
+}
+
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags)
{
struct btree_path *path = iter->update_path ?: iter->path;
- struct bkey_cached *ck;
int ret;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
@@ -494,34 +532,9 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
!path->cached &&
!path->level &&
btree_id_cached(trans->c, path->btree_id)) {
- if (!iter->key_cache_path ||
- !iter->key_cache_path->should_be_locked ||
- !bpos_eq(iter->key_cache_path->pos, k->k.p)) {
- if (!iter->key_cache_path)
- iter->key_cache_path =
- bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
- BTREE_ITER_INTENT|
- BTREE_ITER_CACHED, _THIS_IP_);
-
- iter->key_cache_path =
- bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
- iter->flags & BTREE_ITER_INTENT,
- _THIS_IP_);
-
- ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
- BTREE_ITER_CACHED);
- if (unlikely(ret))
- return ret;
-
- ck = (void *) iter->key_cache_path->l[0].b;
-
- if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
- trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
- }
-
- btree_path_set_should_be_locked(iter->key_cache_path);
- }
+ ret = bch2_trans_update_get_key_cache(trans, iter, path);
+ if (ret)
+ return ret;
path = iter->key_cache_path;
}
@@ -640,6 +653,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
int ret;
bch2_trans_iter_init(trans, &iter, btree, k->k.p,
+ BTREE_ITER_CACHED|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
@@ -648,8 +662,8 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
return ret;
}
-int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
- struct bkey_i *k, enum btree_update_flags flags)
+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
+ struct bkey_i *k, enum btree_update_flags flags)
{
struct btree_iter iter;
int ret;
@@ -667,16 +681,18 @@ int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
* @id: btree to insert into
- * @insert_keys: list of keys to insert
- * @hook: insert callback
+ * @k: key to insert
+ * @disk_res: must be non-NULL whenever inserting or potentially
+ * splitting data extents
+ * @flags: transaction commit flags
+ *
+ * Returns: 0 on success, error code on failure
*/
-int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
- struct bkey_i *k,
- struct disk_reservation *disk_res,
- u64 *journal_seq, int flags)
+int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
+ struct disk_reservation *disk_res, int flags)
{
- return bch2_trans_do(c, disk_res, journal_seq, flags,
- __bch2_btree_insert(&trans, id, k, 0));
+ return bch2_trans_do(c, disk_res, NULL, flags,
+ bch2_btree_insert_trans(trans, id, k, 0));
}
int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
@@ -714,6 +730,23 @@ int bch2_btree_delete_at_buffered(struct btree_trans *trans,
return bch2_trans_update_buffered(trans, btree, k);
}
+int bch2_btree_delete(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos,
+ unsigned update_flags)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, btree, pos,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, update_flags);
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
unsigned update_flags,
@@ -777,9 +810,7 @@ err:
}
bch2_trans_iter_exit(trans, &iter);
- if (!ret && trans_was_restarted(trans, restart_count))
- ret = -BCH_ERR_transaction_restart_nested;
- return ret;
+ return ret ?: trans_was_restarted(trans, restart_count);
}
/*
@@ -793,7 +824,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
u64 *journal_seq)
{
int ret = bch2_trans_run(c,
- bch2_btree_delete_range_trans(&trans, id, start, end,
+ bch2_btree_delete_range_trans(trans, id, start, end,
update_flags, journal_seq));
if (ret == -BCH_ERR_transaction_restart_nested)
ret = 0;
@@ -818,6 +849,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
return bch2_trans_update_buffered(trans, btree, k);
}
+__printf(2, 0)
static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
{
struct printbuf buf = PRINTBUF;
@@ -854,6 +886,7 @@ err:
return ret;
}
+__printf(3, 0)
static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
va_list args)
@@ -865,12 +898,13 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
} else {
ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_LAZY_RW|commit_flags,
- __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
+ __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args));
}
return ret;
}
+__printf(2, 3)
int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;
@@ -886,6 +920,7 @@ int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
* Use for logging messages during recovery to enable reserved space and avoid
* blocking.
*/
+__printf(2, 3)
int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 901c42b..9816d22 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -4,7 +4,6 @@
#include "btree_iter.h"
#include "journal.h"
-#include "journal.h"
struct bch_fs;
struct btree;
@@ -58,14 +57,15 @@ int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
unsigned, unsigned);
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
struct bkey_i *, enum btree_update_flags);
-int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
+int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
enum btree_update_flags);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
- struct disk_reservation *, u64 *, int flags);
+ struct disk_reservation *, int flags);
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
struct bpos, struct bpos, unsigned, u64 *);
@@ -114,8 +114,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);
-int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
-int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
+__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
+__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
/**
* bch2_trans_commit - insert keys at given iterator positions
@@ -145,30 +145,17 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))
-#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
-({ \
- struct btree_trans trans; \
- int _ret; \
- \
- bch2_trans_init(&trans, (_c), 0, 0); \
- _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
- bch2_trans_exit(&trans); \
- \
- _ret; \
-})
-
#define bch2_trans_run(_c, _do) \
({ \
- struct btree_trans trans; \
- int _ret; \
- \
- bch2_trans_init(&trans, (_c), 0, 0); \
- _ret = (_do); \
- bch2_trans_exit(&trans); \
- \
+ struct btree_trans *trans = bch2_trans_get(_c); \
+ int _ret = (_do); \
+ bch2_trans_put(trans); \
_ret; \
})
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
+ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
+
#define trans_for_each_update(_trans, _i) \
for ((_i) = (_trans)->updates; \
(_i) < (_trans)->updates + (_trans)->nr_updates; \
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index c741150..7dbf6b6 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -143,10 +143,15 @@ static size_t btree_node_u64s_with_format(struct btree *b,
}
/**
- * btree_node_format_fits - check if we could rewrite node with a new format
+ * bch2_btree_node_format_fits - check if we could rewrite node with a new format
*
- * This assumes all keys can pack with the new format -- it just checks if
- * the re-packed keys would fit inside the node itself.
+ * @c: filesystem handle
+ * @b: btree node to rewrite
+ * @new_f: bkey format to translate keys to
+ *
+ * Returns: true if all re-packed keys will be able to fit in a new node.
+ *
+ * Assumes all keys will successfully pack with the new format.
*/
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
struct bkey_format *new_f)
@@ -244,7 +249,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct write_point *wp;
struct btree *b;
BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
- struct open_buckets ob = { .nr = 0 };
+ struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
@@ -257,7 +262,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- ob = a->ob;
+ obs = a->ob;
bkey_copy(&tmp.k, &a->k);
mutex_unlock(&c->btree_reserve_cache_lock);
goto mem_alloc;
@@ -292,7 +297,7 @@ retry:
bkey_btree_ptr_v2_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
- bch2_open_bucket_get(c, wp, &ob);
+ bch2_open_bucket_get(c, wp, &obs);
bch2_alloc_sectors_done(c, wp);
mem_alloc:
b = bch2_btree_node_mem_alloc(trans, interior_node);
@@ -304,7 +309,7 @@ mem_alloc:
BUG_ON(b->ob.nr);
bkey_copy(&b->key, &tmp.k);
- b->ob = ob;
+ b->ob = obs;
return b;
}
@@ -592,12 +597,11 @@ static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
struct btree *b;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
u64 journal_seq = 0;
unsigned i;
int ret;
- bch2_trans_init(&trans, c, 0, 512);
/*
* If we're already in an error state, it might be because a btree node
* was never written, and we might be trying to free that same btree
@@ -618,7 +622,7 @@ static void btree_update_nodes_written(struct btree_update *as)
b = as->old_nodes[i];
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
seq = b->data ? b->data->keys.seq : 0;
six_unlock_read(&b->c.lock);
@@ -640,13 +644,13 @@ static void btree_update_nodes_written(struct btree_update *as)
* journal reclaim does btree updates when flushing bkey_cached entries,
* which may require allocations as well.
*/
- ret = commit_do(&trans, &as->disk_res, &journal_seq,
+ ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_reclaim|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM,
- btree_update_nodes_written_trans(&trans, as));
- bch2_trans_unlock(&trans);
+ btree_update_nodes_written_trans(trans, as));
+ bch2_trans_unlock(trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s(): error %s", __func__, bch2_err_str(ret));
@@ -655,7 +659,7 @@ err:
struct btree_path *path;
b = as->b;
- path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p);
+ path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p);
/*
* @b is the node we did the final insert into:
*
@@ -678,13 +682,13 @@ err:
* we may rarely end up with a locked path besides the one we
* have here:
*/
- bch2_trans_unlock(&trans);
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
- mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
+ bch2_trans_unlock(trans);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED);
path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
path->l[b->c.level].b = b;
- bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
+ bch2_btree_node_lock_write_nofail(trans, path, &b->c);
mutex_lock(&c->btree_interior_update_lock);
@@ -697,15 +701,15 @@ err:
* btree_interior_update_lock:
*/
if (as->b == b) {
- struct bset *i = btree_bset_last(b);
-
BUG_ON(!b->c.level);
BUG_ON(!btree_node_dirty(b));
if (!ret) {
- i->journal_seq = cpu_to_le64(
+ struct bset *last = btree_bset_last(b);
+
+ last->journal_seq = cpu_to_le64(
max(journal_seq,
- le64_to_cpu(i->journal_seq)));
+ le64_to_cpu(last->journal_seq)));
bch2_btree_add_journal_pin(c, b, journal_seq);
} else {
@@ -724,8 +728,8 @@ err:
six_unlock_write(&b->c.lock);
btree_node_write_if_need(c, b, SIX_LOCK_intent);
- btree_node_unlock(&trans, path, b->c.level);
- bch2_path_put(&trans, path, true);
+ btree_node_unlock(trans, path, b->c.level);
+ bch2_path_put(trans, path, true);
}
bch2_journal_pin_drop(&c->journal, &as->journal);
@@ -745,7 +749,7 @@ err:
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@@ -753,8 +757,8 @@ err:
for (i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
- bch2_btree_update_free(as, &trans);
- bch2_trans_exit(&trans);
+ bch2_btree_update_free(as, trans);
+ bch2_trans_put(trans);
}
static void btree_interior_update_work(struct work_struct *work)
@@ -1216,18 +1220,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
bch2_recalc_btree_reserve(c);
}
-/**
- * bch_btree_set_root - update the root in memory and on disk
- *
- * To ensure forward progress, the current task must not be holding any
- * btree node write locks. However, you must hold an intent lock on the
- * old root.
- *
- * Note: This allocates a journal entry but doesn't add any keys to
- * it. All the btree roots are part of every journal write, so there
- * is nothing new to be done. This just guarantees that there is a
- * journal write.
- */
static void bch2_btree_set_root(struct btree_update *as,
struct btree_trans *trans,
struct btree_path *path,
@@ -1341,12 +1333,12 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
;
while (!bch2_keylist_empty(keys)) {
- struct bkey_i *k = bch2_keylist_front(keys);
+ insert = bch2_keylist_front(keys);
- if (bpos_gt(k->k.p, b->key.k.p))
+ if (bpos_gt(insert->k.p, b->key.k.p))
break;
- bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, k);
+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
bch2_keylist_pop_front(keys);
}
}
@@ -1513,12 +1505,12 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path1, n1);
path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, path2, n2->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path2, n2->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path2, n2);
/*
@@ -1539,7 +1531,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path2->locks_want++;
BUG_ON(btree_node_locked(path2, n3->c.level));
six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, path2, n3->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path2, n3->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path2, n3);
n3->sib_u64s[0] = U16_MAX;
@@ -1563,7 +1555,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path1, n1);
if (parent)
@@ -1661,12 +1653,16 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
}
/**
- * bch_btree_insert_node - insert bkeys into a given btree node
+ * bch2_btree_insert_node - insert bkeys into a given btree node
*
- * @iter: btree iterator
+ * @as: btree_update object
+ * @trans: btree_trans object
+ * @path: path that points to current node
+ * @b: node to insert keys into
* @keys: list of keys to insert
- * @hook: insert callback
- * @persistent: if not null, @persistent will wait on journal write
+ * @flags: transaction commit flags
+ *
+ * Returns: 0 on success, typically transaction restart error on failure
*
* Inserts as many keys as it can into a given btree node, splitting it if full.
* If a split occurred, this function will return early. This can only happen
@@ -1890,7 +1886,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, new_path, n);
bkey_init(&delete.k);
@@ -1934,9 +1930,6 @@ err_free_update:
goto out;
}
-/**
- * bch_btree_node_rewrite - Rewrite/move a btree node
- */
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
@@ -1967,7 +1960,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
- mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, new_path, n);
trace_and_count(c, btree_node_rewrite, c, b);
@@ -2055,9 +2048,9 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
int ret;
ret = bch2_trans_do(c, NULL, NULL, 0,
- async_btree_node_rewrite_trans(&trans, a));
+ async_btree_node_rewrite_trans(trans, a));
if (ret)
- bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@@ -2096,8 +2089,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
ret = bch2_fs_read_write_early(c);
if (ret) {
- bch_err(c, "%s: error going read-write: %s",
- __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "going read-write");
kfree(a);
return;
}
@@ -2372,7 +2364,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
{
- bch2_trans_run(c, __bch2_btree_root_alloc(&trans, id));
+ bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 6d2d43b..4e6241d 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -296,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
mutex_lock(&wb->flush_lock);
return bch2_trans_run(c,
- __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
+ __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true));
}
static inline u64 btree_write_buffer_ref(int idx)
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index c02c8c9..e7f4506 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -680,7 +680,7 @@ static int check_bucket_ref(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
- u16 bucket_sectors = !ptr->cached
+ u32 bucket_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
struct printbuf buf = PRINTBUF;
@@ -752,9 +752,9 @@ static int check_bucket_ref(struct btree_trans *trans,
goto err;
}
- if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
+ if ((u64) bucket_sectors + sectors > U32_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
- "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
"while marking %s",
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
@@ -1201,7 +1201,7 @@ not_found:
new->k.p = bkey_start_pos(p.k);
new->k.p.offset += *idx - start;
bch2_key_resize(&new->k, next_idx - *idx);
- ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new->k_i,
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
BTREE_TRIGGER_NORUN);
}
@@ -1300,7 +1300,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
static int warned_disk_usage = 0;
bool warn = false;
unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
- struct replicas_delta *d = deltas->d, *d2;
+ struct replicas_delta *d, *d2;
struct replicas_delta *top = (void *) deltas->d + deltas->used;
struct bch_fs_usage *dst;
s64 added = 0, should_not_have_added;
@@ -1923,7 +1923,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
{
- int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
+ int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca));
if (ret)
bch_err_fn(c, ret);
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index f192809..ecbeb72 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -40,15 +40,42 @@ static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, secto
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
+/*
+ * Ugly hack alert:
+ *
+ * We need to cram a spinlock in a single byte, because that's what we have left
+ * in struct bucket, and we care about the size of these - during fsck, we need
+ * in memory state for every single bucket on every device.
+ *
+ * We used to do
+ * while (xchg(&b->lock, 1) cpu_relax();
+ * but, it turns out not all architectures support xchg on a single byte.
+ *
+ * So now we use bit_spin_lock(), with fun games since we can't burn a whole
+ * ulong for this - we just need to make sure the lock bit always ends up in the
+ * first byte.
+ */
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define BUCKET_LOCK_BITNR 0
+#else
+#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
+#endif
+
+union ulong_byte_assert {
+ ulong ulong;
+ u8 byte;
+};
+
static inline void bucket_unlock(struct bucket *b)
{
- smp_store_release(&b->lock, 0);
+ BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
+ bit_spin_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock);
}
static inline void bucket_lock(struct bucket *b)
{
- while (xchg(&b->lock, 1))
- cpu_relax();
+ bit_spin_lock(BUCKET_LOCK_BITNR, (void *) &b->lock);
}
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
@@ -180,7 +207,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
switch (watermark) {
case BCH_WATERMARK_NR:
- unreachable();
+ BUG();
case BCH_WATERMARK_stripe:
reserved += ca->mi.nbuckets >> 6;
fallthrough;
diff --git a/libbcachefs/buckets_waiting_for_journal.c b/libbcachefs/buckets_waiting_for_journal.c
index 81ab685..ec1b636 100644
--- a/libbcachefs/buckets_waiting_for_journal.c
+++ b/libbcachefs/buckets_waiting_for_journal.c
@@ -133,7 +133,7 @@ retry_rehash:
b->t = n;
kvfree(t);
- pr_debug("took %zu rehashes, table at %zu/%zu elements",
+ pr_debug("took %zu rehashes, table at %zu/%lu elements",
nr_rehashes, nr_elements, 1UL << b->t->bits);
out:
mutex_unlock(&b->lock);
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
index fb603df..f69e15d 100644
--- a/libbcachefs/chardev.c
+++ b/libbcachefs/chardev.c
@@ -86,10 +86,9 @@ static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
devs[i] = strndup_user((const char __user *)(unsigned long)
user_devs[i],
PATH_MAX);
- if (!devs[i]) {
- ret = -ENOMEM;
+ ret= PTR_ERR_OR_ZERO(devs[i]);
+ if (ret)
goto err;
- }
}
c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
@@ -117,8 +116,9 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
- if (!path)
- return -ENOMEM;
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
err = bch2_fs_open_incremental(path);
kfree(path);
@@ -149,9 +149,10 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
static long bch2_ioctl_query_uuid(struct bch_fs *c,
struct bch_ioctl_query_uuid __user *user_arg)
{
- return copy_to_user(&user_arg->uuid,
- &c->sb.user_uuid,
- sizeof(c->sb.user_uuid));
+ if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
+ sizeof(c->sb.user_uuid)))
+ return -EFAULT;
+ return 0;
}
#if 0
@@ -188,8 +189,9 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
- if (!path)
- return -ENOMEM;
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
ret = bch2_dev_add(c, path);
kfree(path);
@@ -230,8 +232,9 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
- if (!path)
- return -ENOMEM;
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
ret = bch2_dev_online(c, path);
kfree(path);
@@ -338,7 +341,10 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
if (len < sizeof(e))
return -EINVAL;
- return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
+ if (copy_to_user(buf, &e, sizeof(e)))
+ return -EFAULT;
+
+ return sizeof(e);
}
static const struct file_operations bcachefs_data_ops = {
@@ -417,7 +423,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
return -EFAULT;
- arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
+ arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
if (!arg)
return -ENOMEM;
@@ -466,9 +472,11 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
percpu_up_read(&c->mark_lock);
kfree(src);
- if (!ret)
- ret = copy_to_user(user_arg, arg,
- sizeof(*arg) + arg->replica_entries_bytes);
+ if (ret)
+ goto err;
+ if (copy_to_user(user_arg, arg,
+ sizeof(*arg) + arg->replica_entries_bytes))
+ ret = -EFAULT;
err:
kfree(arg);
return ret;
@@ -513,7 +521,10 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
percpu_ref_put(&ca->ref);
- return copy_to_user(user_arg, &arg, sizeof(arg));
+ if (copy_to_user(user_arg, &arg, sizeof(arg)))
+ return -EFAULT;
+
+ return 0;
}
static long bch2_ioctl_read_super(struct bch_fs *c,
@@ -550,8 +561,9 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
goto err;
}
- ret = copy_to_user((void __user *)(unsigned long)arg.sb,
- sb, vstruct_bytes(sb));
+ if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
+ vstruct_bytes(sb)))
+ ret = -EFAULT;
err:
if (!IS_ERR_OR_NULL(ca))
percpu_ref_put(&ca->ref);
@@ -617,6 +629,9 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
arg.pad)
return -EINVAL;
+ if (arg.nbuckets > U32_MAX)
+ return -EINVAL;
+
ca = bch2_device_lookup(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c
index 4c87c59..1948119 100644
--- a/libbcachefs/checksum.c
+++ b/libbcachefs/checksum.c
@@ -139,7 +139,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
for (i = 0; i < pages; i++) {
unsigned offset = offset_in_page(buf);
- unsigned pg_len = min(len, PAGE_SIZE - offset);
+ unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset);
sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
buf += pg_len;
@@ -159,15 +159,16 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
crypto_alloc_sync_skcipher("chacha20", 0, 0);
int ret;
- if (!chacha20) {
- pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
- return PTR_ERR(chacha20);
+ ret = PTR_ERR_OR_ZERO(chacha20);
+ if (ret) {
+ pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret));
+ return ret;
}
ret = crypto_skcipher_setkey(&chacha20->base,
(void *) key, sizeof(*key));
if (ret) {
- pr_err("crypto_skcipher_setkey() error: %i", ret);
+ pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret));
goto err;
}
@@ -366,11 +367,11 @@ struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
BUG_ON(!bch2_checksum_mergeable(type));
while (b_len) {
- unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
+ unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE);
bch2_checksum_update(&state,
- page_address(ZERO_PAGE(0)), b);
- b_len -= b;
+ page_address(ZERO_PAGE(0)), page_len);
+ b_len -= page_len;
}
a.lo = (__le64 __force) bch2_checksum_final(&state);
a.lo ^= b.lo;
@@ -395,9 +396,9 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
unsigned csum_type;
struct bch_csum csum;
} splits[3] = {
- { crc_a, len_a, new_csum_type },
- { crc_b, len_b, new_csum_type },
- { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type },
+ { crc_a, len_a, new_csum_type, { 0 }},
+ { crc_b, len_b, new_csum_type, { 0 } },
+ { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } },
}, *i;
bool mergeable = crc_old.csum_type == new_csum_type &&
bch2_checksum_mergeable(new_csum_type);
@@ -558,6 +559,7 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
return ret;
}
+#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *sb)
{
key_serial_t key_id;
@@ -575,6 +577,7 @@ int bch2_revoke_key(struct bch_sb *sb)
return 0;
}
+#endif
int bch2_decrypt_sb_key(struct bch_fs *c,
struct bch_sb_field_crypt *crypt,
@@ -596,7 +599,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
/* decrypt real key: */
ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
- &sb_key, sizeof(sb_key));
+ &sb_key, sizeof(sb_key));
if (ret)
goto err;
diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h
index 9a4898d..1399838 100644
--- a/libbcachefs/checksum.h
+++ b/libbcachefs/checksum.h
@@ -40,15 +40,16 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
*/
#define csum_vstruct(_c, _type, _nonce, _i) \
({ \
- const void *start = ((const void *) (_i)) + sizeof((_i)->csum); \
- const void *end = vstruct_end(_i); \
+ const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\
\
- bch2_checksum(_c, _type, _nonce, start, end - start); \
+ bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
})
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
+#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *);
+#endif
int bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
void *data, size_t);
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index 6b17f7c..1480b64 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -3,7 +3,6 @@
#include "checksum.h"
#include "compress.h"
#include "extents.h"
-#include "io.h"
#include "super-io.h"
#include <linux/lz4.h>
@@ -571,7 +570,6 @@ void bch2_fs_compress_exit(struct bch_fs *c)
static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
- bool decompress_workspace_needed;
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
struct {
@@ -581,7 +579,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
size_t decompress_workspace;
} compression_types[] = {
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
- max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
+ 0 },
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
@@ -620,9 +619,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
if (!(features & (1 << i->feature)))
continue;
- if (i->decompress_workspace)
- decompress_workspace_needed = true;
-
if (mempool_initialized(&c->compress_workspace[i->type]))
continue;
diff --git a/libbcachefs/counters.c b/libbcachefs/counters.c
index 442a9b8..26eb3d8 100644
--- a/libbcachefs/counters.c
+++ b/libbcachefs/counters.c
@@ -43,7 +43,7 @@ static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
prt_tab(out);
prt_printf(out, "%llu", le64_to_cpu(ctrs->d[i]));
prt_newline(out);
- };
+ }
};
int bch2_sb_counters_to_cpu(struct bch_fs *c)
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 81518f2..899ff46 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -9,7 +9,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
-#include "io.h"
+#include "io_write.h"
#include "keylist.h"
#include "move.h"
#include "nocow_locking.h"
@@ -49,10 +49,6 @@ static void trace_move_extent_fail2(struct data_update *m,
if (insert) {
i = 0;
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
- struct bkey_s new_s;
- new_s.k = (void *) new.k;
- new_s.v = (void *) new.v;
-
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached)
@@ -307,7 +303,7 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op)
{
- return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op));
+ return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
}
void bch2_data_update_read_done(struct data_update *m,
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index 49e9055..7ca1f98 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -4,7 +4,7 @@
#define _BCACHEFS_DATA_UPDATE_H
#include "bkey_buf.h"
-#include "io_types.h"
+#include "io_write_types.h"
struct moving_context;
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index ae47e18..75a3dc7 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -19,7 +19,6 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
-#include "io.h"
#include "super.h"
#include <linux/console.h>
@@ -154,10 +153,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
BUG_ON(b->nsets != 1);
for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k))
- if (k->type == KEY_TYPE_btree_ptr_v2) {
- struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k);
- v->mem_ptr = 0;
- }
+ if (k->type == KEY_TYPE_btree_ptr_v2)
+ ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0;
v = c->verify_data;
bkey_copy(&v->key, &b->key);
@@ -322,16 +319,16 @@ static ssize_t flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
- int err = copy_to_user(i->ubuf, i->buf.buf, bytes);
+ int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes);
- if (err)
- return err;
+ i->ret += copied;
+ i->ubuf += copied;
+ i->size -= copied;
+ i->buf.pos -= copied;
+ memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
- i->ret += bytes;
- i->ubuf += bytes;
- i->size -= bytes;
- i->buf.pos -= bytes;
- memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
+ if (copied != bytes)
+ return -EFAULT;
}
return i->size ? 0 : i->ret;
@@ -369,7 +366,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
ssize_t ret;
@@ -382,17 +379,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
if (ret)
return ret;
- bch2_trans_init(&trans, i->c, 0, 0);
- ret = for_each_btree_key2(&trans, iter, i->id, i->from,
+ trans = bch2_trans_get(i->c);
+ ret = for_each_btree_key2(trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
bch2_bkey_val_to_text(&i->buf, i->c, k);
prt_newline(&i->buf);
- drop_locks_do(&trans, flush_buf(i));
+ drop_locks_do(trans, flush_buf(i));
}));
i->from = iter.pos;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);
@@ -411,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct btree *b;
ssize_t ret;
@@ -427,26 +424,26 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (bpos_eq(SPOS_MAX, i->from))
return i->ret;
- bch2_trans_init(&trans, i->c, 0, 0);
+ trans = bch2_trans_get(i->c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
+ for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) {
bch2_btree_node_to_text(&i->buf, i->c, b);
i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
- ret = drop_locks_do(&trans, flush_buf(i));
+ ret = drop_locks_do(trans, flush_buf(i));
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);
@@ -465,7 +462,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
ssize_t ret;
@@ -478,9 +475,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (ret)
return ret;
- bch2_trans_init(&trans, i->c, 0, 0);
+ trans = bch2_trans_get(i->c);
- ret = for_each_btree_key2(&trans, iter, i->id, i->from,
+ ret = for_each_btree_key2(trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
@@ -493,11 +490,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
}
bch2_bfloat_to_text(&i->buf, l->b, _k);
- drop_locks_do(&trans, flush_buf(i));
+ drop_locks_do(trans, flush_buf(i));
}));
i->from = iter.pos;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index a7559ab..6c6c8d5 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -479,21 +479,19 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
const struct bch_hash_info *hash_info,
const struct qstr *name, subvol_inum *inum)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
int ret;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
+ ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info,
name, inum, 0);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret)
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
@@ -522,7 +520,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent dirent;
@@ -533,15 +531,14 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
int ret;
bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot),
POS(inum.inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
@@ -549,7 +546,7 @@ retry:
dirent = bkey_s_c_to_dirent(k);
- ret = bch2_dirent_read_target(&trans, inum, dirent, &target);
+ ret = bch2_dirent_read_target(trans, inum, dirent, &target);
if (ret < 0)
break;
if (ret)
@@ -558,7 +555,7 @@ retry:
/* dir_emit() can fault and block: */
bch2_bkey_buf_reassemble(&sk, c, k);
dirent = bkey_i_to_s_c_dirent(sk.k);
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
name = bch2_dirent_get_name(dirent);
@@ -574,16 +571,16 @@ retry:
* read_target looks up subvolumes, we can overflow paths if the
* directory has many subvolumes in it
*/
- ret = btree_trans_too_many_iters(&trans);
+ ret = btree_trans_too_many_iters(trans);
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
index f36472c..b292dbe 100644
--- a/libbcachefs/disk_groups.c
+++ b/libbcachefs/disk_groups.c
@@ -32,21 +32,21 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb,
for (i = 0; i < sb->nr_devices; i++) {
struct bch_member *m = mi->members + i;
- unsigned g;
+ unsigned group_id;
if (!BCH_MEMBER_GROUP(m))
continue;
- g = BCH_MEMBER_GROUP(m) - 1;
+ group_id = BCH_MEMBER_GROUP(m) - 1;
- if (g >= nr_groups) {
+ if (group_id >= nr_groups) {
prt_printf(err, "disk %u has invalid label %u (have %u)",
- i, g, nr_groups);
+ i, group_id, nr_groups);
return -BCH_ERR_invalid_sb_disk_groups;
}
- if (BCH_GROUP_DELETED(&groups->entries[g])) {
- prt_printf(err, "disk %u has deleted label %u", i, g);
+ if (BCH_GROUP_DELETED(&groups->entries[group_id])) {
+ prt_printf(err, "disk %u has deleted label %u", i, group_id);
return -BCH_ERR_invalid_sb_disk_groups;
}
}
@@ -183,8 +183,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
struct bch_member *m = mi->members + i;
- struct bch_disk_group_cpu *dst =
- &cpu_g->entries[BCH_MEMBER_GROUP(m)];
+ struct bch_disk_group_cpu *dst;
if (!bch2_member_exists(m))
continue;
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index f58e84a..8646856 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -11,10 +11,11 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
+#include "checksum.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "io.h"
+#include "io_read.h"
#include "keylist.h"
#include "recovery.h"
#include "replicas.h"
@@ -475,7 +476,7 @@ err:
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
{
- return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe));
+ return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe));
}
/* recovery read path: */
@@ -787,12 +788,10 @@ static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
container_of(work, struct bch_fs, ec_stripe_delete_work);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
int ret;
u64 idx;
- bch2_trans_init(&trans, c, 0, 0);
-
while (1) {
mutex_lock(&c->ec_stripes_heap_lock);
idx = stripe_idx_to_delete(c);
@@ -801,15 +800,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
if (!idx)
break;
- ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
- ec_stripe_delete(&trans, idx));
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ ec_stripe_delete(trans, idx));
if (ret) {
bch_err_fn(c, ret);
break;
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
}
@@ -998,24 +997,22 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = bch2_btree_write_buffer_flush(&trans);
+ ret = bch2_btree_write_buffer_flush(trans);
if (ret)
goto err;
for (i = 0; i < nr_data; i++) {
- ret = ec_stripe_update_bucket(&trans, s, i);
+ ret = ec_stripe_update_bucket(trans, s, i);
if (ret)
break;
}
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -1123,7 +1120,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
ret = bch2_trans_do(c, &s->res, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
- ec_stripe_key_update(&trans,
+ ec_stripe_key_update(trans,
bkey_i_to_stripe(&s->new_stripe.key),
!s->have_existing_stripe));
if (ret) {
@@ -1133,8 +1130,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
ret = ec_stripe_update_extents(c, &s->new_stripe);
if (ret) {
- bch_err(c, "error creating stripe: error updating pointers: %s",
- bch2_err_str(ret));
+ bch_err_msg(c, ret, "creating stripe: error updating pointers");
goto err;
}
err:
@@ -1822,7 +1818,7 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_stripe *s;
@@ -1830,9 +1826,7 @@ int bch2_stripes_read(struct bch_fs *c)
unsigned i;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->type != KEY_TYPE_stripe)
continue;
@@ -1855,9 +1849,9 @@ int bch2_stripes_read(struct bch_fs *c)
bch2_stripes_heap_insert(c, m, k.k->p.offset);
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 885ae5d..966d165 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -240,7 +240,7 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
bch2_ec_do_stripe_creates(c);
break;
default:
- unreachable();
+ BUG();
}
}
diff --git a/libbcachefs/errcode.c b/libbcachefs/errcode.c
index dc906fc..d260ff9 100644
--- a/libbcachefs/errcode.c
+++ b/libbcachefs/errcode.c
@@ -12,8 +12,6 @@ static const char * const bch2_errcode_strs[] = {
NULL
};
-#define BCH_ERR_0 0
-
static unsigned bch2_errcode_parents[] = {
#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
BCH_ERRCODES()
@@ -61,3 +59,10 @@ int __bch2_err_class(int err)
return -err;
}
+
+const char *bch2_blk_status_to_str(blk_status_t status)
+{
+ if (status == BLK_STS_REMOVED)
+ return "device removed";
+ return blk_status_to_str(status);
+}
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index f7fa874..64f7176 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -99,6 +99,7 @@
x(ENOENT, ENOENT_str_hash_set_must_replace) \
x(ENOENT, ENOENT_inode) \
x(ENOENT, ENOENT_not_subvol) \
+ x(ENOENT, ENOENT_not_directory) \
x(ENOENT, ENOENT_directory_dead) \
x(ENOENT, ENOENT_subvolume) \
x(ENOENT, ENOENT_snapshot_tree) \
@@ -218,7 +219,14 @@
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_bad_node) \
- x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible)
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible) \
+ x(0, nopromote) \
+ x(BCH_ERR_nopromote, nopromote_may_not) \
+ x(BCH_ERR_nopromote, nopromote_already_promoted) \
+ x(BCH_ERR_nopromote, nopromote_unwritten) \
+ x(BCH_ERR_nopromote, nopromote_congested) \
+ x(BCH_ERR_nopromote, nopromote_in_flight) \
+ x(BCH_ERR_nopromote, nopromote_enomem)
enum bch_errcode {
BCH_ERR_START = 2048,
@@ -249,4 +257,8 @@ static inline long bch2_err_class(long err)
return err < 0 ? __bch2_err_class(err) : err;
}
+#define BLK_STS_REMOVED ((__force blk_status_t)128)
+
+const char *bch2_blk_status_to_str(blk_status_t);
+
#endif /* _BCACHFES_ERRCODE_H */
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 39009cf..2a5af88 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
-#include "io.h"
#include "super.h"
#define FSCK_ERR_RATELIMIT_NR 10
diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c
index cbfb5b2..58ccc7b 100644
--- a/libbcachefs/fs-io-buffered.c
+++ b/libbcachefs/fs-io-buffered.c
@@ -8,7 +8,8 @@
#include "fs-io-buffered.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include <linux/backing-dev.h>
#include <linux/pagemap.h>
@@ -269,7 +270,7 @@ void bch2_readahead(struct readahead_control *ractl)
struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct folio *folio;
struct readpages_iter readpages_iter;
int ret;
@@ -279,8 +280,6 @@ void bch2_readahead(struct readahead_control *ractl)
ret = readpages_iter_init(&readpages_iter, ractl);
BUG_ON(ret);
- bch2_trans_init(&trans, c, 0, 0);
-
bch2_pagecache_add_get(inode);
while ((folio = readpage_iter_peek(&readpages_iter))) {
@@ -299,31 +298,27 @@ void bch2_readahead(struct readahead_control *ractl)
rbio->bio.bi_end_io = bch2_readpages_end_io;
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
- bchfs_read(&trans, rbio, inode_inum(inode),
+ bchfs_read(trans, rbio, inode_inum(inode),
&readpages_iter);
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
}
bch2_pagecache_add_put(inode);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
darray_exit(&readpages_iter.folios);
}
static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum, struct folio *folio)
{
- struct btree_trans trans;
-
bch2_folio_create(folio, __GFP_NOFAIL);
rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
- bch2_trans_init(&trans, c, 0, 0);
- bchfs_read(&trans, rbio, inum, NULL);
- bch2_trans_exit(&trans);
+ bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
}
static void bch2_read_single_folio_end_io(struct bio *bio)
@@ -694,12 +689,12 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR_OR_NULL(folio))
goto err_unlock;
- if (folio_test_uptodate(folio))
- goto out;
-
offset = pos - folio_pos(folio);
len = min_t(size_t, len, folio_end_pos(folio) - pos);
+ if (folio_test_uptodate(folio))
+ goto out;
+
/* If we're writing entire folio, don't need to read it in first: */
if (!offset && len == folio_size(folio))
goto out;
@@ -800,10 +795,10 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
return copied;
}
-static noinline void folios_trunc(folios *folios, struct folio **fi)
+static noinline void folios_trunc(folios *fs, struct folio **fi)
{
- while (folios->data + folios->nr > fi) {
- struct folio *f = darray_pop(folios);
+ while (fs->data + fs->nr > fi) {
+ struct folio *f = darray_pop(fs);
folio_unlock(f);
folio_put(f);
@@ -817,35 +812,35 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
- folios folios;
+ folios fs;
struct folio **fi, *f;
- unsigned copied = 0, f_offset;
- u64 end = pos + len, f_pos;
+ unsigned copied = 0, f_offset, f_copied;
+ u64 end = pos + len, f_pos, f_len;
loff_t last_folio_pos = inode->v.i_size;
int ret = 0;
BUG_ON(!len);
bch2_folio_reservation_init(c, inode, &res);
- darray_init(&folios);
+ darray_init(&fs);
ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
mapping_gfp_mask(mapping),
- &folios);
+ &fs);
if (ret)
goto out;
- BUG_ON(!folios.nr);
+ BUG_ON(!fs.nr);
- f = darray_first(folios);
+ f = darray_first(fs);
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
if (ret)
goto out;
}
- f = darray_last(folios);
+ f = darray_last(fs);
end = min(end, folio_end_pos(f));
last_folio_pos = folio_pos(f);
if (end != folio_end_pos(f) && !folio_test_uptodate(f)) {
@@ -858,15 +853,15 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
}
- ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr);
+ ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr);
if (ret)
goto out;
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
/*
* XXX: per POSIX and fstests generic/275, on -ENOSPC we're
@@ -878,11 +873,11 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
*/
ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
if (unlikely(ret)) {
- folios_trunc(&folios, fi);
- if (!folios.nr)
+ folios_trunc(&fs, fi);
+ if (!fs.nr)
goto out;
- end = min(end, folio_end_pos(darray_last(folios)));
+ end = min(end, folio_end_pos(darray_last(fs)));
break;
}
@@ -891,18 +886,17 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
if (mapping_writably_mapped(mapping))
- darray_for_each(folios, fi)
+ darray_for_each(fs, fi)
flush_dcache_folio(*fi);
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
- unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
-
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) {
- folios_trunc(&folios, fi);
+ folios_trunc(&fs, fi);
break;
}
@@ -911,7 +905,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
pos + copied + f_copied < inode->v.i_size) {
iov_iter_revert(iter, f_copied);
folio_zero_range(f, 0, folio_size(f));
- folios_trunc(&folios, fi);
+ folios_trunc(&fs, fi);
break;
}
@@ -919,7 +913,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
copied += f_copied;
if (f_copied != f_len) {
- folios_trunc(&folios, fi + 1);
+ folios_trunc(&fs, fi + 1);
break;
}
@@ -938,10 +932,10 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
spin_unlock(&inode->v.i_lock);
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
if (!folio_test_uptodate(f))
folio_mark_uptodate(f);
@@ -954,7 +948,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
inode->ei_last_dirtied = (unsigned long) current;
out:
- darray_for_each(folios, fi) {
+ darray_for_each(fs, fi) {
folio_unlock(*fi);
folio_put(*fi);
}
@@ -967,7 +961,7 @@ out:
if (last_folio_pos >= inode->v.i_size)
truncate_pagecache(&inode->v, inode->v.i_size);
- darray_exit(&folios);
+ darray_exit(&fs);
bch2_folio_reservation_put(c, inode, &res);
return copied ?: ret;
@@ -1055,8 +1049,6 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(&inode->v);
inode_lock(&inode->v);
ret = generic_write_checks(iocb, from);
@@ -1076,7 +1068,6 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos += ret;
unlock:
inode_unlock(&inode->v);
- current->backing_dev_info = NULL;
if (ret > 0)
ret = generic_write_sync(iocb, ret);
diff --git a/libbcachefs/fs-io-direct.c b/libbcachefs/fs-io-direct.c
index 2b29abd..6a9557e 100644
--- a/libbcachefs/fs-io-direct.c
+++ b/libbcachefs/fs-io-direct.c
@@ -7,10 +7,12 @@
#include "fs-io.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include <linux/kthread.h>
#include <linux/pagemap.h>
+#include <linux/prefetch.h>
#include <linux/task_io_accounting_ops.h>
/* O_DIRECT reads */
@@ -232,23 +234,21 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
u64 offset, u64 size,
unsigned nr_replicas, bool compressed)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
u64 end = offset + size;
u32 snapshot;
bool ret = true;
int err;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (err)
goto err;
- for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, err) {
if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end)))
@@ -263,11 +263,11 @@ retry:
}
offset = iter.pos.offset;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(err, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return err ? false : ret;
}
diff --git a/libbcachefs/fs-io-pagecache.c b/libbcachefs/fs-io-pagecache.c
index 1e60eea..8bd9bcd 100644
--- a/libbcachefs/fs-io-pagecache.c
+++ b/libbcachefs/fs-io-pagecache.c
@@ -14,7 +14,7 @@
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
loff_t start, u64 end,
int fgp_flags, gfp_t gfp,
- folios *folios)
+ folios *fs)
{
struct folio *f;
u64 pos = start;
@@ -24,7 +24,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
if ((u64) pos >= (u64) start + (1ULL << 20))
fgp_flags &= ~FGP_CREAT;
- ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL);
+ ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
if (ret)
break;
@@ -32,16 +32,16 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
if (IS_ERR_OR_NULL(f))
break;
- BUG_ON(folios->nr && folio_pos(f) != pos);
+ BUG_ON(fs->nr && folio_pos(f) != pos);
pos = folio_end_pos(f);
- darray_push(folios, f);
+ darray_push(fs, f);
}
- if (!folios->nr && !ret && (fgp_flags & FGP_CREAT))
+ if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
ret = -ENOMEM;
- return folios->nr ? 0 : ret;
+ return fs->nr ? 0 : ret;
}
/* pagecache_block must be held */
@@ -73,12 +73,15 @@ int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
return ret;
}
+#if 0
+/* Useful for debug tracing: */
static const char * const bch2_folio_sector_states[] = {
#define x(n) #n,
BCH_FOLIO_SECTOR_STATE()
#undef x
NULL
};
+#endif
static inline enum bch_folio_sector_state
folio_sector_dirty(enum bch_folio_sector_state state)
@@ -177,20 +180,20 @@ static void __bch2_folio_set(struct folio *folio,
* extents btree:
*/
int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
- struct folio **folios, unsigned nr_folios)
+ struct folio **fs, unsigned nr_folios)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_folio *s;
- u64 offset = folio_sector(folios[0]);
+ u64 offset = folio_sector(fs[0]);
unsigned folio_idx;
u32 snapshot;
bool need_set = false;
int ret;
for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
- s = bch2_folio_create(folios[folio_idx], GFP_KERNEL);
+ s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
if (!s)
return -ENOMEM;
@@ -201,22 +204,22 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
return 0;
folio_idx = 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, ret) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k);
while (folio_idx < nr_folios) {
- struct folio *folio = folios[folio_idx];
+ struct folio *folio = fs[folio_idx];
u64 folio_start = folio_sector(folio);
u64 folio_end = folio_end_sector(folio);
unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
@@ -240,11 +243,11 @@ retry:
}
offset = iter.pos.offset;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 4804e5a..b0e8144 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@@ -16,7 +17,7 @@
#include "fsck.h"
#include "inode.h"
#include "journal.h"
-#include "io.h"
+#include "io_misc.h"
#include "keylist.h"
#include "quota.h"
#include "reflink.h"
@@ -164,7 +165,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
#endif
}
-
/* fsync: */
/*
@@ -207,31 +207,29 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol,
struct bpos start,
struct bpos end)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot);
if (ret)
goto err;
- for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) {
ret = 1;
break;
}
start = iter.pos;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -241,8 +239,8 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bch_folio *s;
- unsigned start_offset = start & (PAGE_SIZE - 1);
- unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
+ unsigned start_offset;
+ unsigned end_offset;
unsigned i;
struct folio *folio;
s64 i_sectors_delta = 0;
@@ -391,33 +389,12 @@ static int bch2_extend(struct mnt_idmap *idmap,
return bch2_setattr_nonsize(idmap, inode, iattr);
}
-static int bch2_truncate_finish_fn(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi,
- void *p)
-{
- bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
- return 0;
-}
-
-static int bch2_truncate_start_fn(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi, void *p)
-{
- u64 *new_i_size = p;
-
- bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
- bi->bi_size = *new_i_size;
- return 0;
-}
-
-int bch2_truncate(struct mnt_idmap *idmap,
+int bchfs_truncate(struct mnt_idmap *idmap,
struct bch_inode_info *inode, struct iattr *iattr)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bch_inode_unpacked inode_u;
- u64 new_i_size = iattr->ia_size;
s64 i_sectors_delta = 0;
int ret = 0;
@@ -466,6 +443,8 @@ int bch2_truncate(struct mnt_idmap *idmap,
if (unlikely(ret < 0))
goto err;
+ truncate_setsize(&inode->v, iattr->ia_size);
+
/*
* When extending, we're going to write the new i_size to disk
* immediately so we need to flush anything above the current on disk
@@ -487,32 +466,22 @@ int bch2_truncate(struct mnt_idmap *idmap,
if (ret)
goto err;
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
- &new_i_size, 0);
- mutex_unlock(&inode->ei_update_lock);
+ ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ /*
+ * If we error here, VFS caches are now inconsistent with btree
+ */
+ set_bit(EI_INODE_ERROR, &inode->ei_flags);
goto err;
-
- truncate_setsize(&inode->v, iattr->ia_size);
-
- ret = bch2_fpunch(c, inode_inum(inode),
- round_up(iattr->ia_size, block_bytes(c)) >> 9,
- U64_MAX, &i_sectors_delta);
- bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+ }
bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal), c,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
- if (unlikely(ret))
- goto err;
-
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0);
- mutex_unlock(&inode->ei_update_lock);
ret = bch2_setattr_nonsize(idmap, inode, iattr);
err:
@@ -577,175 +546,33 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
- struct bkey_buf copy;
- struct btree_trans trans;
- struct btree_iter src, dst, del;
- loff_t shift, new_size;
- u64 src_start;
+ s64 i_sectors_delta = 0;
int ret = 0;
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
if (insert) {
- if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
- return -EFBIG;
-
if (offset >= inode->v.i_size)
return -EINVAL;
-
- src_start = U64_MAX;
- shift = len;
} else {
if (offset + len >= inode->v.i_size)
return -EINVAL;
-
- src_start = offset + len;
- shift = -len;
}
- new_size = inode->v.i_size + shift;
-
ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret)
return ret;
- if (insert) {
- i_size_write(&inode->v, new_size);
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode_size(c, inode, new_size,
- ATTR_MTIME|ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
- } else {
- s64 i_sectors_delta = 0;
-
- ret = bch2_fpunch(c, inode_inum(inode),
- offset >> 9, (offset + len) >> 9,
- &i_sectors_delta);
- bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-
- if (ret)
- return ret;
- }
-
- bch2_bkey_buf_init(&copy);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
- bch2_trans_iter_init(&trans, &src, BTREE_ID_extents,
- POS(inode->v.i_ino, src_start >> 9),
- BTREE_ITER_INTENT);
- bch2_trans_copy_iter(&dst, &src);
- bch2_trans_copy_iter(&del, &src);
-
- while (ret == 0 ||
- bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- struct disk_reservation disk_res =
- bch2_disk_reservation_init(c, 0);
- struct bkey_i delete;
- struct bkey_s_c k;
- struct bpos next_pos;
- struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
- struct bpos atomic_end;
- unsigned trigger_flags = 0;
- u32 snapshot;
-
- bch2_trans_begin(&trans);
-
- ret = bch2_subvolume_get_snapshot(&trans,
- inode->ei_subvol, &snapshot);
- if (ret)
- continue;
-
- bch2_btree_iter_set_snapshot(&src, snapshot);
- bch2_btree_iter_set_snapshot(&dst, snapshot);
- bch2_btree_iter_set_snapshot(&del, snapshot);
-
- bch2_trans_begin(&trans);
-
- k = insert
- ? bch2_btree_iter_peek_prev(&src)
- : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX));
- if ((ret = bkey_err(k)))
- continue;
-
- if (!k.k || k.k->p.inode != inode->v.i_ino)
- break;
-
- if (insert &&
- bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9)))
- break;
-reassemble:
- bch2_bkey_buf_reassemble(&copy, c, k);
-
- if (insert &&
- bkey_lt(bkey_start_pos(k.k), move_pos))
- bch2_cut_front(move_pos, copy.k);
-
- copy.k->k.p.offset += shift >> 9;
- bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k));
-
- ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end);
- if (ret)
- continue;
-
- if (!bkey_eq(atomic_end, copy.k->k.p)) {
- if (insert) {
- move_pos = atomic_end;
- move_pos.offset -= shift >> 9;
- goto reassemble;
- } else {
- bch2_cut_back(atomic_end, copy.k);
- }
- }
-
- bkey_init(&delete.k);
- delete.k.p = copy.k->k.p;
- delete.k.size = copy.k->k.size;
- delete.k.p.offset -= shift >> 9;
- bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k));
-
- next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
-
- if (copy.k->k.size != k.k->size) {
- /* We might end up splitting compressed extents: */
- unsigned nr_ptrs =
- bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
-
- ret = bch2_disk_reservation_get(c, &disk_res,
- copy.k->k.size, nr_ptrs,
- BCH_DISK_RESERVATION_NOFAIL);
- BUG_ON(ret);
- }
+ if (insert)
+ i_size_write(&inode->v, inode->v.i_size + len);
- ret = bch2_btree_iter_traverse(&del) ?:
- bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
- bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
- bch2_trans_commit(&trans, &disk_res, NULL,
- BTREE_INSERT_NOFAIL);
- bch2_disk_reservation_put(c, &disk_res);
-
- if (!ret)
- bch2_btree_iter_set_pos(&src, next_pos);
- }
- bch2_trans_iter_exit(&trans, &del);
- bch2_trans_iter_exit(&trans, &dst);
- bch2_trans_iter_exit(&trans, &src);
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&copy, c);
-
- if (ret)
- return ret;
+ ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9,
+ insert, &i_sectors_delta);
+ if (!ret && !insert)
+ i_size_write(&inode->v, inode->v.i_size - len);
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
- mutex_lock(&inode->ei_update_lock);
- if (!insert) {
- i_size_write(&inode->v, new_size);
- ret = bch2_write_inode_size(c, inode, new_size,
- ATTR_MTIME|ATTR_CTIME);
- } else {
- /* We need an inode update to update bi_journal_seq for fsync: */
- ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
- ATTR_MTIME|ATTR_CTIME);
- }
- mutex_unlock(&inode->ei_update_lock);
return ret;
}
@@ -753,16 +580,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
u64 start_sector, u64 end_sector)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bpos end_pos = POS(inode->v.i_ino, end_sector);
struct bch_io_opts opts;
int ret = 0;
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inode->v.i_ino, start_sector),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@@ -775,9 +601,9 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
u64 hole_start, hole_end;
u32 snapshot;
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans,
+ ret = bch2_subvolume_get_snapshot(trans,
inode->ei_subvol, &snapshot);
if (ret)
goto bkey_err;
@@ -814,7 +640,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
&hole_start,
&hole_end,
opts.data_replicas, true))
- ret = drop_locks_do(&trans,
+ ret = drop_locks_do(trans,
(bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
@@ -837,7 +663,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
}
- ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter,
+ ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter,
sectors, opts, &i_sectors_delta,
writepoint_hashed((unsigned long) current));
if (ret)
@@ -845,7 +671,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
- drop_locks_do(&trans,
+ drop_locks_do(trans,
(bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
@@ -857,14 +683,14 @@ bkey_err:
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
- bch2_fpunch_at(&trans, &iter, inode_inum(inode),
+ bch2_fpunch_at(trans, &iter, inode_inum(inode),
end_sector, &i_sectors_delta);
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
bch2_quota_reservation_put(c, inode, &quota_res);
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
@@ -970,26 +796,24 @@ static int quota_reserve_range(struct bch_inode_info *inode,
u64 start, u64 end)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
u32 snapshot;
u64 sectors = end - start;
u64 pos = start;
int ret;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
if (ret)
goto err;
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, pos, snapshot), 0);
- while (!(ret = btree_trans_too_many_iters(&trans)) &&
+ while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k &&
!(ret = bkey_err(k))) {
if (bkey_extent_is_allocation(k.k)) {
@@ -1001,17 +825,14 @@ retry:
bch2_btree_iter_advance(&iter);
}
pos = iter.pos.offset;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
-
- if (ret)
- return ret;
+ bch2_trans_put(trans);
- return bch2_quota_reservation_add(c, inode, res, sectors, true);
+ return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
}
loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
@@ -1104,7 +925,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
@@ -1116,15 +937,15 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents,
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
POS(inode->v.i_ino, U64_MAX),
0, k, ret) {
@@ -1134,12 +955,12 @@ retry:
} else if (k.k->p.offset >> 9 > isize)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
return ret;
@@ -1157,7 +978,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
@@ -1169,15 +990,15 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
@@ -1195,12 +1016,12 @@ retry:
offset = max(offset, bkey_start_offset(k.k) << 9);
}
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
return ret;
diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h
index bb5b709..ca70346 100644
--- a/libbcachefs/fs-io.h
+++ b/libbcachefs/fs-io.h
@@ -6,7 +6,7 @@
#include "buckets.h"
#include "fs.h"
-#include "io_types.h"
+#include "io_write_types.h"
#include "quota.h"
#include <linux/uio.h>
@@ -165,7 +165,7 @@ int __must_check bch2_write_inode_size(struct bch_fs *,
int bch2_fsync(struct file *, loff_t, loff_t, int);
-int bch2_truncate(struct mnt_idmap *,
+int bchfs_truncate(struct mnt_idmap *,
struct bch_inode_info *, struct iattr *);
long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c
index 141bcce..0679b2f 100644
--- a/libbcachefs/fs-ioctl.c
+++ b/libbcachefs/fs-ioctl.c
@@ -122,7 +122,10 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
- return copy_to_user(arg, &fa, sizeof(fa));
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -EFAULT;
+
+ return 0;
}
static int fssetxattr_inode_update_fn(struct btree_trans *trans,
diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/fs-ioctl.h
index f201980..54a9c21 100644
--- a/libbcachefs/fs-ioctl.h
+++ b/libbcachefs/fs-ioctl.h
@@ -5,7 +5,7 @@
/* Inode flags: */
/* bcachefs inode flags -> vfs inode flags: */
-static const unsigned bch_flags_to_vfs[] = {
+static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_SYNC] = S_SYNC,
[__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
[__BCH_INODE_APPEND] = S_APPEND,
@@ -13,7 +13,7 @@ static const unsigned bch_flags_to_vfs[] = {
};
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-static const unsigned bch_flags_to_uflags[] = {
+static const __maybe_unused unsigned bch_flags_to_uflags[] = {
[__BCH_INODE_SYNC] = FS_SYNC_FL,
[__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
[__BCH_INODE_APPEND] = FS_APPEND_FL,
@@ -22,7 +22,7 @@ static const unsigned bch_flags_to_uflags[] = {
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-static const unsigned bch_flags_to_xflags[] = {
+static const __maybe_unused unsigned bch_flags_to_xflags[] = {
[__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
[__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
[__BCH_INODE_APPEND] = FS_XFLAG_APPEND,
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 80dcda4..1354af2 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -19,7 +19,7 @@
#include "fs-io-pagecache.h"
#include "fsck.h"
#include "inode.h"
-#include "io.h"
+#include "io_read.h"
#include "journal.h"
#include "keylist.h"
#include "quota.h"
@@ -82,29 +82,27 @@ int __must_check bch2_write_inode(struct bch_fs *c,
inode_set_fn set,
void *p, unsigned fields)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bch_inode_unpacked inode_u;
int ret;
-
- bch2_trans_init(&trans, c, 0, 512);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT) ?:
- (set ? set(&trans, inode, &inode_u, p) : 0) ?:
- bch2_inode_write(&trans, &iter, &inode_u) ?:
- bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+ (set ? set(trans, inode, &inode_u, p) : 0) ?:
+ bch2_inode_write(trans, &iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
/*
* the btree node lock protects inode->ei_inode, not ei_update_lock;
* this is important for inode updates via bchfs_write_index_update
*/
if (!ret)
- bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
+ bch2_inode_update_after_write(trans, inode, &inode_u, fields);
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
@@ -114,7 +112,7 @@ retry:
inode_inum(inode).subvol,
inode_inum(inode).inum);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
@@ -182,7 +180,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{
struct bch_inode_unpacked inode_u;
struct bch_inode_info *inode;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct bch_subvolume subvol;
int ret;
@@ -196,14 +194,14 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
if (!(inode->v.i_state & I_NEW))
return &inode->v;
- bch2_trans_init(&trans, c, 8, 0);
- ret = lockrestart_do(&trans,
- bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?:
- bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
+ trans = bch2_trans_get(c);
+ ret = lockrestart_do(trans,
+ bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
if (!ret)
- bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
- bch2_trans_exit(&trans);
+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
+ bch2_trans_put(trans);
if (ret) {
iget_failed(&inode->v);
@@ -226,7 +224,7 @@ __bch2_create(struct mnt_idmap *idmap,
unsigned flags)
{
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct bch_inode_unpacked dir_u;
struct bch_inode_info *inode, *old;
struct bch_inode_unpacked inode_u;
@@ -256,13 +254,11 @@ __bch2_create(struct mnt_idmap *idmap,
if (!(flags & BCH_CREATE_TMPFILE))
mutex_lock(&dir->ei_update_lock);
- bch2_trans_init(&trans, c, 8,
- 2048 + (!(flags & BCH_CREATE_TMPFILE)
- ? dentry->d_name.len : 0));
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_create_trans(&trans,
+ ret = bch2_create_trans(trans,
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
@@ -278,9 +274,9 @@ retry:
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
inum.inum = inode_u.bi_inum;
- ret = bch2_subvolume_get(&trans, inum.subvol, true,
+ ret = bch2_subvolume_get(trans, inum.subvol, true,
BTREE_ITER_WITH_UPDATES, &subvol) ?:
- bch2_trans_commit(&trans, NULL, &journal_seq, 0);
+ bch2_trans_commit(trans, NULL, &journal_seq, 0);
if (unlikely(ret)) {
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
@@ -291,13 +287,13 @@ err_before_quota:
}
if (!(flags & BCH_CREATE_TMPFILE)) {
- bch2_inode_update_after_write(&trans, dir, &dir_u,
+ bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&dir->ei_update_lock);
}
bch2_iget5_set(&inode->v, &inum);
- bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@@ -337,7 +333,7 @@ err_before_quota:
unlock_new_inode(&inode->v);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
err:
posix_acl_release(default_acl);
posix_acl_release(acl);
@@ -346,7 +342,7 @@ err_trans:
if (!(flags & BCH_CREATE_TMPFILE))
mutex_unlock(&dir->ei_update_lock);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
make_bad_inode(&inode->v);
iput(&inode->v);
inode = ERR_PTR(ret);
@@ -401,26 +397,25 @@ static int __bch2_link(struct bch_fs *c,
struct bch_inode_info *dir,
struct dentry *dentry)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bch_inode_unpacked dir_u, inode_u;
int ret;
mutex_lock(&inode->ei_update_lock);
- bch2_trans_init(&trans, c, 4, 1024);
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_link_trans(&trans,
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_link_trans(trans,
inode_inum(dir), &dir_u,
inode_inum(inode), &inode_u,
&dentry->d_name));
if (likely(!ret)) {
- bch2_inode_update_after_write(&trans, dir, &dir_u,
+ bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
+ bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
mutex_unlock(&inode->ei_update_lock);
return ret;
}
@@ -451,24 +446,23 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
struct bch_inode_info *dir = to_bch_ei(vdir);
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_inode_unpacked dir_u, inode_u;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
int ret;
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
- bch2_trans_init(&trans, c, 4, 1024);
- ret = commit_do(&trans, NULL, NULL,
+ ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL,
- bch2_unlink_trans(&trans,
+ bch2_unlink_trans(trans,
inode_inum(dir), &dir_u,
&inode_u, &dentry->d_name,
deleting_snapshot));
if (unlikely(ret))
goto err;
- bch2_inode_update_after_write(&trans, dir, &dir_u,
+ bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, inode, &inode_u,
+ bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_MTIME);
if (inode_u.bi_subvol) {
@@ -479,8 +473,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
set_nlink(&inode->v, 0);
}
err:
- bch2_trans_exit(&trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
+ bch2_trans_put(trans);
return ret;
}
@@ -543,7 +537,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
struct bch_inode_unpacked dst_dir_u, src_dir_u;
struct bch_inode_unpacked src_inode_u, dst_inode_u;
- struct btree_trans trans;
+ struct btree_trans *trans;
enum bch_rename_mode mode = flags & RENAME_EXCHANGE
? BCH_RENAME_EXCHANGE
: dst_dentry->d_inode
@@ -560,7 +554,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
return ret;
}
- bch2_trans_init(&trans, c, 8, 2048);
+ trans = bch2_trans_get(c);
bch2_lock_inodes(INODE_UPDATE_LOCK,
src_dir,
@@ -587,8 +581,8 @@ static int bch2_rename2(struct mnt_idmap *idmap,
goto err;
}
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_rename_trans(&trans,
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_rename_trans(trans,
inode_inum(src_dir), &src_dir_u,
inode_inum(dst_dir), &dst_dir_u,
&src_inode_u,
@@ -603,21 +597,21 @@ static int bch2_rename2(struct mnt_idmap *idmap,
BUG_ON(dst_inode &&
dst_inode->v.i_ino != dst_inode_u.bi_inum);
- bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
+ bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
ATTR_MTIME|ATTR_CTIME);
if (src_dir != dst_dir)
- bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
+ bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
+ bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
ATTR_CTIME);
if (dst_inode)
- bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
+ bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
ATTR_CTIME);
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_fs_quota_transfer(c, src_inode,
bch_qid(&src_inode->ei_inode),
@@ -680,7 +674,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_qid qid;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
struct posix_acl *acl = NULL;
@@ -701,13 +695,13 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
if (ret)
goto err;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
kfree(acl);
acl = NULL;
- ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
@@ -715,29 +709,29 @@ retry:
bch2_setattr_copy(idmap, inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
- ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
+ ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
inode_u.bi_mode, &acl);
if (ret)
goto btree_err;
}
- ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
btree_err:
- bch2_trans_iter_exit(&trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &inode_iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err_trans;
- bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
+ bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
if (acl)
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
err_trans:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
err:
mutex_unlock(&inode->ei_update_lock);
@@ -798,7 +792,7 @@ static int bch2_setattr(struct mnt_idmap *idmap,
return ret;
return iattr->ia_valid & ATTR_SIZE
- ? bch2_truncate(idmap, inode, iattr)
+ ? bchfs_truncate(idmap, inode, iattr)
: bch2_setattr_nonsize(idmap, inode, iattr);
}
@@ -879,7 +873,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
{
struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(vinode);
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_buf cur, prev;
@@ -900,18 +894,18 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_bkey_buf_init(&cur);
bch2_bkey_buf_init(&prev);
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
if (ret)
goto err;
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(ei->v.i_ino, start, snapshot), 0);
- while (!(ret = btree_trans_too_many_iters(&trans)) &&
+ while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, end)).k &&
!(ret = bkey_err(k))) {
enum btree_id data_btree = BTREE_ID_extents;
@@ -928,7 +922,7 @@ retry:
bch2_bkey_buf_reassemble(&cur, c, k);
- ret = bch2_read_indirect_extent(&trans, &data_btree,
+ ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &cur);
if (ret)
break;
@@ -947,7 +941,7 @@ retry:
cur.k->k.p.offset += cur.k->k.size;
if (have_extent) {
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(prev.k), 0);
if (ret)
@@ -961,18 +955,18 @@ retry:
POS(iter.pos.inode, iter.pos.offset + sectors));
}
start = iter.pos.offset;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret && have_extent) {
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
FIEMAP_EXTENT_LAST);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
return ret < 0 ? ret : 0;
@@ -1230,7 +1224,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
struct bch_inode_info *inode = to_bch_ei(child->d_inode);
struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter1;
struct btree_iter iter2;
struct bkey_s_c k;
@@ -1245,23 +1239,23 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
if (!S_ISDIR(dir->v.i_mode))
return -EINVAL;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
+ bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
- bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter1, snapshot);
bch2_btree_iter_set_snapshot(&iter2, snapshot);
- ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
+ ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
if (ret)
goto err;
@@ -1279,7 +1273,7 @@ retry:
}
d = bkey_s_c_to_dirent(k);
- ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
if (ret > 0)
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
if (ret)
@@ -1301,7 +1295,7 @@ retry:
continue;
d = bkey_s_c_to_dirent(k);
- ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
if (ret < 0)
break;
if (ret)
@@ -1325,9 +1319,9 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter1);
- bch2_trans_iter_exit(&trans, &iter2);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter1);
+ bch2_trans_iter_exit(trans, &iter2);
+ bch2_trans_put(trans);
return ret;
}
@@ -1661,7 +1655,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
up_write(&c->state_lock);
}
- if (opts.errors >= 0)
+ if (opt_defined(opts, errors))
c->opts.errors = opts.errors;
err:
return bch2_err_class(ret);
@@ -1722,6 +1716,35 @@ static void bch2_put_super(struct super_block *sb)
__bch2_fs_stop(c);
}
+/*
+ * bcachefs doesn't currently integrate intwrite freeze protection but the
+ * internal write references serve the same purpose. Therefore reuse the
+ * read-only transition code to perform the quiesce. The caveat is that we don't
+ * currently have the ability to block tasks that want a write reference while
+ * the superblock is frozen. This is fine for now, but we should either add
+ * blocking support or find a way to integrate sb_start_intwrite() and friends.
+ */
+static int bch2_freeze(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ down_write(&c->state_lock);
+ bch2_fs_read_only(c);
+ up_write(&c->state_lock);
+ return 0;
+}
+
+static int bch2_unfreeze(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ int ret;
+
+ down_write(&c->state_lock);
+ ret = bch2_fs_read_write(c);
+ up_write(&c->state_lock);
+ return ret;
+}
+
static const struct super_operations bch_super_operations = {
.alloc_inode = bch2_alloc_inode,
.destroy_inode = bch2_destroy_inode,
@@ -1733,10 +1756,8 @@ static const struct super_operations bch_super_operations = {
.show_options = bch2_show_options,
.remount_fs = bch2_remount,
.put_super = bch2_put_super,
-#if 0
.freeze_fs = bch2_freeze,
.unfreeze_fs = bch2_unfreeze,
-#endif
};
static int bch2_set_super(struct super_block *s, void *data)
@@ -1890,7 +1911,7 @@ got_sb:
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
if (ret) {
- bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "mounting: error getting root inode");
goto err_put_super;
}
diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h
index 10e1111..5edf1d4 100644
--- a/libbcachefs/fs.h
+++ b/libbcachefs/fs.h
@@ -197,7 +197,7 @@ int bch2_vfs_init(void);
#else
-#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) do {} while (0)
+#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 57b3dfa..206302b 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -80,7 +80,7 @@ static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
if (!ret)
*subvol = le32_to_cpu(s.subvol);
else if (bch2_err_matches(ret, ENOENT))
- bch_err(trans->c, "snapshot %u not fonud", snapshot);
+ bch_err(trans->c, "snapshot %u not found", snapshot);
return ret;
}
@@ -127,8 +127,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
ret = bch2_inode_unpack(k, inode);
err:
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(trans->c, "error fetching inode %llu: %s",
- inode_nr, bch2_err_str(ret));
+ bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -154,8 +153,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
*snapshot = iter.pos.snapshot;
err:
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(trans->c, "error fetching inode %llu:%u: %s",
- inode_nr, *snapshot, bch2_err_str(ret));
+ bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -206,17 +204,16 @@ static int __write_inode(struct btree_trans *trans,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
}
-static int write_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *inode,
- u32 snapshot)
+static int fsck_write_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
{
int ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__write_inode(trans, inode, snapshot));
if (ret)
- bch_err(trans->c, "error in fsck: error updating inode: %s",
- bch2_err_str(ret));
+ bch_err_fn(trans->c, ret);
return ret;
}
@@ -278,13 +275,13 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
}
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
return ret;
if (d_type != DT_DIR) {
bch_err(c, "error looking up lost+found: not a directory");
- return ret;
+ return -BCH_ERR_ENOENT_not_directory;
}
/*
@@ -301,7 +298,7 @@ create_lostfound:
0, 0, S_IFDIR|0700, 0, NULL, NULL,
(subvol_inum) { }, 0);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "creating lost+found");
return ret;
}
@@ -365,8 +362,7 @@ static int reattach_inode(struct btree_trans *trans,
BTREE_INSERT_NOFAIL,
__reattach_inode(trans, inode, inode_snapshot));
if (ret) {
- bch_err(trans->c, "error reattaching inode %llu: %s",
- inode->bi_inum, bch2_err_str(ret));
+ bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum);
return ret;
}
@@ -475,7 +471,12 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
* key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
* and @ancestor hasn't been overwritten in @seen
*
- * That is, returns whether key in @ancestor snapshot is visible in @id snapshot
+ * @c: filesystem handle
+ * @seen: list of snapshot ids already seen at current position
+ * @id: descendent snapshot id
+ * @ancestor: ancestor snapshot id
+ *
+ * Returns: whether key in @ancestor snapshot is visible in @id snapshot
*/
static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
u32 id, u32 ancestor)
@@ -520,14 +521,16 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
* snapshot id @dst, test whether there is some snapshot in which @dst is
* visible.
*
- * This assumes we're visiting @src keys in natural key order.
+ * @c: filesystem handle
+ * @s: list of snapshot IDs already seen at @src
+ * @src: snapshot ID of src key
+ * @dst: snapshot ID of dst key
+ * Returns: true if there is some snapshot in which @dst is visible
*
- * @s - list of snapshot IDs already seen at @src
- * @src - snapshot ID of src key
- * @dst - snapshot ID of dst key
+ * Assumes we're visiting @src keys in natural key order
*/
-static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
- u32 src, u32 dst)
+static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s,
+ u32 src, u32 dst)
{
return dst <= src
? key_visible_in_snapshot(c, s, dst, src)
@@ -618,10 +621,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
w->first_this_inode = true;
- if (trans_was_restarted(trans, restart_count))
- return -BCH_ERR_transaction_restart_nested;
-
- return 0;
+ return trans_was_restarted(trans, restart_count);
}
static struct inode_walker_entry *
@@ -822,7 +822,7 @@ bad_hash:
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
return ret;
ret = -BCH_ERR_transaction_restart_nested;
@@ -886,7 +886,8 @@ static int check_inode(struct btree_trans *trans,
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret) {
- bch_err_msg(c, ret, "in fsck: error updating inode");
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err_msg(c, ret, "in fsck updating inode");
return ret;
}
@@ -904,8 +905,7 @@ static int check_inode(struct btree_trans *trans,
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "error in fsck: error while deleting inode: %s",
- bch2_err_str(ret));
+ bch_err_msg(c, ret, "in fsck deleting inode");
return ret;
}
@@ -928,8 +928,7 @@ static int check_inode(struct btree_trans *trans,
POS(u.bi_inum, U64_MAX),
0, NULL);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "error in fsck: error truncating inode: %s",
- bch2_err_str(ret));
+ bch_err_msg(c, ret, "in fsck truncating inode");
if (ret)
return ret;
@@ -954,8 +953,7 @@ static int check_inode(struct btree_trans *trans,
sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
if (sectors < 0) {
- bch_err(c, "error in fsck: error recounting inode sectors: %s",
- bch2_err_str(sectors));
+ bch_err_msg(c, sectors, "fsck recounting inode sectors");
return sectors;
}
@@ -974,13 +972,13 @@ static int check_inode(struct btree_trans *trans,
if (do_update) {
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret) {
- bch_err_msg(c, ret, "in fsck: error updating inode");
+ bch_err_msg(c, ret, "in fsck updating inode");
return ret;
}
}
err:
fsck_err:
- if (ret)
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_fn(c, ret);
return ret;
}
@@ -989,7 +987,7 @@ noinline_for_stack
int bch2_check_inodes(struct bch_fs *c)
{
bool full = c->opts.fsck;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bch_inode_unpacked prev = { 0 };
struct snapshots_seen s;
@@ -997,16 +995,15 @@ int bch2_check_inodes(struct bch_fs *c)
int ret;
snapshots_seen_init(&s);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_inode(&trans, &iter, k, &prev, &s, full));
+ check_inode(trans, &iter, k, &prev, &s, full));
- bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -1081,7 +1078,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
w->last_pos.inode, i->snapshot,
i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count;
- ret = write_inode(trans, &i->inode, i->snapshot);
+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
}
@@ -1089,9 +1086,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
fsck_err:
if (ret)
bch_err_fn(c, ret);
- if (!ret && trans_was_restarted(trans, restart_count))
- ret = -BCH_ERR_transaction_restart_nested;
- return ret;
+ return ret ?: trans_was_restarted(trans, restart_count);
}
struct extent_end {
@@ -1441,7 +1436,7 @@ int bch2_check_extents(struct bch_fs *c)
{
struct inode_walker w = inode_walker_init();
struct snapshots_seen s;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct extent_ends extent_ends;
@@ -1450,23 +1445,22 @@ int bch2_check_extents(struct bch_fs *c)
snapshots_seen_init(&s);
extent_ends_init(&extent_ends);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
bch2_disk_reservation_put(c, &res);
- check_extent(&trans, &iter, k, &w, &s, &extent_ends);
+ check_extent(trans, &iter, k, &w, &s, &extent_ends);
})) ?:
- check_i_sectors(&trans, &w);
+ check_i_sectors(trans, &w);
bch2_disk_reservation_put(c, &res);
extent_ends_exit(&extent_ends);
inode_walker_exit(&w);
- bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@@ -1501,7 +1495,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
- ret = write_inode(trans, &i->inode, i->snapshot);
+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
}
@@ -1509,9 +1503,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
fsck_err:
if (ret)
bch_err_fn(c, ret);
- if (!ret && trans_was_restarted(trans, restart_count))
- ret = -BCH_ERR_transaction_restart_nested;
- return ret;
+ return ret ?: trans_was_restarted(trans, restart_count);
}
static int check_dirent_target(struct btree_trans *trans,
@@ -1809,23 +1801,22 @@ int bch2_check_dirents(struct bch_fs *c)
struct inode_walker target = inode_walker_init();
struct snapshots_seen s;
struct bch_hash_info hash_info;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
snapshots_seen_init(&s);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_dirents,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_dirent(&trans, &iter, k, &hash_info, &dir, &target, &s));
+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
snapshots_seen_exit(&s);
inode_walker_exit(&dir);
inode_walker_exit(&target);
@@ -1879,23 +1870,18 @@ int bch2_check_xattrs(struct bch_fs *c)
{
struct inode_walker inode = inode_walker_init();
struct bch_hash_info hash_info;
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_xattr(&trans, &iter, k, &hash_info, &inode));
-
- bch2_trans_exit(&trans);
-
+ check_xattr(trans, &iter, k, &hash_info, &inode)));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -1927,10 +1913,10 @@ static int check_root_trans(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
- __bch2_btree_insert(trans, BTREE_ID_subvolumes,
+ bch2_btree_insert_trans(trans, BTREE_ID_subvolumes,
&root_subvol.k_i, 0));
if (ret) {
- bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "writing root subvol");
goto err;
}
@@ -1949,7 +1935,7 @@ static int check_root_trans(struct btree_trans *trans)
ret = __write_inode(trans, &root_inode, snapshot);
if (ret)
- bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "writing root inode");
}
err:
fsck_err:
@@ -1964,7 +1950,7 @@ int bch2_check_root(struct bch_fs *c)
ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
- check_root_trans(&trans));
+ check_root_trans(trans));
if (ret)
bch_err_fn(c, ret);
@@ -2116,16 +2102,14 @@ fsck_err:
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
pathbuf path = { 0, };
int ret;
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
@@ -2142,12 +2126,12 @@ int bch2_check_directory_structure(struct bch_fs *c)
if (u.bi_flags & BCH_INODE_UNLINKED)
continue;
- ret = check_path(&trans, &path, &u, iter.pos.snapshot);
+ ret = check_path(trans, &path, &u, iter.pos.snapshot);
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
darray_exit(&path);
if (ret)
@@ -2155,8 +2139,6 @@ int bch2_check_directory_structure(struct bch_fs *c)
return ret;
}
-/* check_nlink pass: */
-
struct nlink_table {
size_t nr;
size_t size;
@@ -2238,15 +2220,13 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t,
u64 start, u64 *end)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
int ret = 0;
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_inodes,
+ for_each_btree_key(trans, iter, BTREE_ID_inodes,
POS(0, start),
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
@@ -2275,8 +2255,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
}
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
@@ -2288,7 +2268,7 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct snapshots_seen s;
struct btree_iter iter;
struct bkey_s_c k;
@@ -2297,9 +2277,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
snapshots_seen_init(&s);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
@@ -2319,12 +2297,12 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
break;
}
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
snapshots_seen_exit(&s);
return ret;
}
@@ -2375,22 +2353,17 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct nlink_table *links,
u64 range_start, u64 range_end)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
size_t idx = 0;
int ret = 0;
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
- POS(0, range_start),
- BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
- NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
-
- bch2_trans_exit(&trans);
-
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ POS(0, range_start),
+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
if (ret < 0) {
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
return ret;
@@ -2472,13 +2445,12 @@ int bch2_fix_reflink_p(struct bch_fs *c)
return 0;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_extents, POS_MIN,
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
- fix_reflink_p_key(&trans, &iter, k)));
-
+ fix_reflink_p_key(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 8114b6e..8bfd99c 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -120,8 +120,7 @@ static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
- int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i),
- &unpacked);
+ ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
@@ -318,7 +317,7 @@ int bch2_inode_unpack(struct bkey_s_c k,
return bch2_inode_unpack_slowpath(k, unpacked);
}
-int bch2_inode_peek(struct btree_trans *trans,
+static int bch2_inode_peek_nowarn(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
subvol_inum inum, unsigned flags)
@@ -349,7 +348,17 @@ int bch2_inode_peek(struct btree_trans *trans,
return 0;
err:
bch2_trans_iter_exit(trans, iter);
- if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+}
+
+int bch2_inode_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
+
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
return ret;
}
@@ -817,7 +826,7 @@ err:
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_i_inode_generation delete;
struct bch_inode_unpacked inode_u;
@@ -825,8 +834,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
u32 snapshot;
int ret;
- bch2_trans_init(&trans, c, 0, 1024);
-
/*
* If this was a directory, there shouldn't be any real dirents left -
* but there could be whiteouts (from hash collisions) that we should
@@ -835,19 +842,19 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
* XXX: the dirent could ideally would delete whiteouts when they're no
* longer needed
*/
- ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?:
- bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?:
- bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents);
+ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
if (ret)
goto err;
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- k = bch2_bkey_get_iter(&trans, &iter, BTREE_ID_inodes,
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, inum.inum, snapshot),
BTREE_ITER_INTENT|BTREE_ITER_CACHED);
ret = bkey_err(k);
@@ -855,7 +862,7 @@ retry:
goto err;
if (!bkey_is_inode(k.k)) {
- bch2_fs_inconsistent(trans.c,
+ bch2_fs_inconsistent(c,
"inode %llu:%u not found when deleting",
inum.inum, snapshot);
ret = -EIO;
@@ -868,15 +875,28 @@ retry:
delete.k.p = iter.pos;
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
- ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
err:
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -897,7 +917,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
struct bch_inode_unpacked *inode)
{
return bch2_trans_do(c, NULL, NULL, 0,
- bch2_inode_find_by_inum_trans(&trans, inum, inode));
+ bch2_inode_find_by_inum_trans(trans, inum, inode));
}
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
@@ -1069,14 +1089,12 @@ delete:
int bch2_delete_dead_inodes(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = bch2_btree_write_buffer_flush_sync(&trans);
+ ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
goto err;
@@ -1086,26 +1104,26 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
* but we can't retry because the btree write buffer won't have been
* flushed and we'd spin:
*/
- for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p));
+ ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p));
if (ret < 0)
break;
if (ret) {
if (!test_bit(BCH_FS_RW, &c->flags)) {
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
bch2_fs_lazy_rw(c);
}
- ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot);
+ ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 22b2440..a7464e1 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -118,6 +118,9 @@ int bch2_inode_create(struct btree_trans *, struct btree_iter *,
int bch2_inode_rm(struct bch_fs *, subvol_inum);
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
+ subvol_inum,
+ struct bch_inode_unpacked *);
int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
deleted file mode 100644
index 831e3f1..0000000
--- a/libbcachefs/io.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_IO_H
-#define _BCACHEFS_IO_H
-
-#include "checksum.h"
-#include "bkey_buf.h"
-#include "io_types.h"
-
-#define to_wbio(_bio) \
- container_of((_bio), struct bch_write_bio, bio)
-
-#define to_rbio(_bio) \
- container_of((_bio), struct bch_read_bio, bio)
-
-void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-
-#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-void bch2_latency_acct(struct bch_dev *, u64, int);
-#else
-static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
-#endif
-
-void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
- enum bch_data_type, const struct bkey_i *, bool);
-
-#define BLK_STS_REMOVED ((__force blk_status_t)128)
-
-const char *bch2_blk_status_to_str(blk_status_t);
-
-#define BCH_WRITE_FLAGS() \
- x(ALLOC_NOWAIT) \
- x(CACHED) \
- x(DATA_ENCODED) \
- x(PAGES_STABLE) \
- x(PAGES_OWNED) \
- x(ONLY_SPECIFIED_DEVS) \
- x(WROTE_DATA_INLINE) \
- x(FROM_INTERNAL) \
- x(CHECK_ENOSPC) \
- x(SYNC) \
- x(MOVE) \
- x(IN_WORKER) \
- x(DONE) \
- x(IO_ERROR) \
- x(CONVERT_UNWRITTEN)
-
-enum __bch_write_flags {
-#define x(f) __BCH_WRITE_##f,
- BCH_WRITE_FLAGS()
-#undef x
-};
-
-enum bch_write_flags {
-#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
- BCH_WRITE_FLAGS()
-#undef x
-};
-
-static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-{
- return op->watermark == BCH_WATERMARK_copygc
- ? op->c->copygc_wq
- : op->c->btree_update_wq;
-}
-
-int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, bool *, s64 *, s64 *);
-int bch2_extent_update(struct btree_trans *, subvol_inum,
- struct btree_iter *, struct bkey_i *,
- struct disk_reservation *, u64, s64 *, bool);
-int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
- unsigned, struct bch_io_opts, s64 *,
- struct write_point_specifier);
-
-int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
- subvol_inum, u64, s64 *);
-int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
-
-static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
- struct bch_io_opts opts)
-{
- op->c = c;
- op->end_io = NULL;
- op->flags = 0;
- op->written = 0;
- op->error = 0;
- op->csum_type = bch2_data_checksum_type(c, opts);
- op->compression_opt = opts.compression;
- op->nr_replicas = 0;
- op->nr_replicas_required = c->opts.data_replicas_required;
- op->watermark = BCH_WATERMARK_normal;
- op->incompressible = 0;
- op->open_buckets.nr = 0;
- op->devs_have.nr = 0;
- op->target = 0;
- op->opts = opts;
- op->subvol = 0;
- op->pos = POS_MAX;
- op->version = ZERO_VERSION;
- op->write_point = (struct write_point_specifier) { 0 };
- op->res = (struct disk_reservation) { 0 };
- op->new_i_size = U64_MAX;
- op->i_sectors_delta = 0;
- op->devs_need_flush = NULL;
-}
-
-void bch2_write(struct closure *);
-
-void bch2_write_point_do_index_updates(struct work_struct *);
-
-static inline struct bch_write_bio *wbio_init(struct bio *bio)
-{
- struct bch_write_bio *wbio = to_wbio(bio);
-
- memset(&wbio->wbio, 0, sizeof(wbio->wbio));
- return wbio;
-}
-
-void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
-
-struct bch_devs_mask;
-struct cache_promote_op;
-struct extent_ptr_decoded;
-
-int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
- struct bkey_buf *);
-
-static inline int bch2_read_indirect_extent(struct btree_trans *trans,
- enum btree_id *data_btree,
- unsigned *offset_into_extent,
- struct bkey_buf *k)
-{
- if (k->k->k.type != KEY_TYPE_reflink_p)
- return 0;
-
- *data_btree = BTREE_ID_reflink;
- return __bch2_read_indirect_extent(trans, offset_into_extent, k);
-}
-
-enum bch_read_flags {
- BCH_READ_RETRY_IF_STALE = 1 << 0,
- BCH_READ_MAY_PROMOTE = 1 << 1,
- BCH_READ_USER_MAPPED = 1 << 2,
- BCH_READ_NODECODE = 1 << 3,
- BCH_READ_LAST_FRAGMENT = 1 << 4,
-
- /* internal: */
- BCH_READ_MUST_BOUNCE = 1 << 5,
- BCH_READ_MUST_CLONE = 1 << 6,
- BCH_READ_IN_RETRY = 1 << 7,
-};
-
-int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
- struct bvec_iter, struct bpos, enum btree_id,
- struct bkey_s_c, unsigned,
- struct bch_io_failures *, unsigned);
-
-static inline void bch2_read_extent(struct btree_trans *trans,
- struct bch_read_bio *rbio, struct bpos read_pos,
- enum btree_id data_btree, struct bkey_s_c k,
- unsigned offset_into_extent, unsigned flags)
-{
- __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
- data_btree, k, offset_into_extent, NULL, flags);
-}
-
-void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
- subvol_inum, struct bch_io_failures *, unsigned flags);
-
-static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
- subvol_inum inum)
-{
- struct bch_io_failures failed = { .nr = 0 };
-
- BUG_ON(rbio->_state);
-
- rbio->c = c;
- rbio->start_time = local_clock();
- rbio->subvol = inum.subvol;
-
- __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
- BCH_READ_RETRY_IF_STALE|
- BCH_READ_MAY_PROMOTE|
- BCH_READ_USER_MAPPED);
-}
-
-static inline struct bch_read_bio *rbio_init(struct bio *bio,
- struct bch_io_opts opts)
-{
- struct bch_read_bio *rbio = to_rbio(bio);
-
- rbio->_state = 0;
- rbio->promote = NULL;
- rbio->opts = opts;
- return rbio;
-}
-
-void bch2_fs_io_exit(struct bch_fs *);
-int bch2_fs_io_init(struct bch_fs *);
-
-#endif /* _BCACHEFS_IO_H */
diff --git a/libbcachefs/io_misc.c b/libbcachefs/io_misc.c
new file mode 100644
index 0000000..32432bd
--- /dev/null
+++ b/libbcachefs/io_misc.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * io_misc.c - fallocate, fpunch, truncate:
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "extents.h"
+#include "extent_update.h"
+#include "inode.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "logged_ops.h"
+#include "subvolume.h"
+
+/* Overwrites whatever was present with zeroes: */
+int bch2_extent_fallocate(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ unsigned sectors,
+ struct bch_io_opts opts,
+ s64 *i_sectors_delta,
+ struct write_point_specifier write_point)
+{
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = { 0 };
+ struct closure cl;
+ struct open_buckets open_buckets = { 0 };
+ struct bkey_s_c k;
+ struct bkey_buf old, new;
+ unsigned sectors_allocated = 0;
+ bool have_reservation = false;
+ bool unwritten = opts.nocow &&
+ c->sb.version >= bcachefs_metadata_version_unwritten_extents;
+ int ret;
+
+ bch2_bkey_buf_init(&old);
+ bch2_bkey_buf_init(&new);
+ closure_init_stack(&cl);
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
+
+ if (!have_reservation) {
+ unsigned new_replicas =
+ max(0, (int) opts.data_replicas -
+ (int) bch2_bkey_nr_ptrs_fully_allocated(k));
+ /*
+ * Get a disk reservation before (in the nocow case) calling
+ * into the allocator:
+ */
+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
+ if (unlikely(ret))
+ goto err;
+
+ bch2_bkey_buf_reassemble(&old, c, k);
+ }
+
+ if (have_reservation) {
+ if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
+ goto err;
+
+ bch2_key_resize(&new.k->k, sectors);
+ } else if (!unwritten) {
+ struct bkey_i_reservation *reservation;
+
+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
+ reservation = bkey_reservation_init(new.k);
+ reservation->k.p = iter->pos;
+ bch2_key_resize(&reservation->k, sectors);
+ reservation->v.nr_replicas = opts.data_replicas;
+ } else {
+ struct bkey_i_extent *e;
+ struct bch_devs_list devs_have;
+ struct write_point *wp;
+ struct bch_extent_ptr *ptr;
+
+ devs_have.nr = 0;
+
+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
+
+ e = bkey_extent_init(new.k);
+ e->k.p = iter->pos;
+
+ ret = bch2_alloc_sectors_start_trans(trans,
+ opts.foreground_target,
+ false,
+ write_point,
+ &devs_have,
+ opts.data_replicas,
+ opts.data_replicas,
+ BCH_WATERMARK_normal, 0, &cl, &wp);
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ ret = -BCH_ERR_transaction_restart_nested;
+ if (ret)
+ goto err;
+
+ sectors = min(sectors, wp->sectors_free);
+ sectors_allocated = sectors;
+
+ bch2_key_resize(&e->k, sectors);
+
+ bch2_open_bucket_get(c, wp, &open_buckets);
+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
+ bch2_alloc_sectors_done(c, wp);
+
+ extent_for_each_ptr(extent_i_to_s(e), ptr)
+ ptr->unwritten = true;
+ }
+
+ have_reservation = true;
+
+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
+ 0, i_sectors_delta, true);
+err:
+ if (!ret && sectors_allocated)
+ bch2_increment_clock(c, sectors_allocated, WRITE);
+
+ bch2_open_buckets_put(c, &open_buckets);
+ bch2_disk_reservation_put(c, &disk_res);
+ bch2_bkey_buf_exit(&new, c);
+ bch2_bkey_buf_exit(&old, c);
+
+ if (closure_nr_remaining(&cl) != 1) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
+ */
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+ subvol_inum inum, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bpos end_pos = POS(inum.inum, end);
+ struct bkey_s_c k;
+ int ret = 0, ret2 = 0;
+ u32 snapshot;
+
+ while (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete;
+
+ if (ret)
+ ret2 = ret;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(iter, snapshot);
+
+ /*
+ * peek_upto() doesn't have ideal semantics for extents:
+ */
+ k = bch2_btree_iter_peek_upto(iter, end_pos);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k);
+ if (ret)
+ continue;
+
+ bkey_init(&delete.k);
+ delete.k.p = iter->pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end_pos, &delete);
+
+ ret = bch2_extent_update(trans, inum, iter, &delete,
+ &disk_res, 0, i_sectors_delta, false);
+ bch2_disk_reservation_put(c, &disk_res);
+ }
+
+ return ret ?: ret2;
+}
+
+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, start),
+ BTREE_ITER_INTENT);
+
+ ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+
+ return ret;
+}
+
+/* truncate: */
+
+void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k);
+
+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
+ prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size));
+}
+
+static int truncate_set_isize(struct btree_trans *trans,
+ subvol_inum inum,
+ u64 new_i_size)
+{
+ struct btree_iter iter = { NULL };
+ struct bch_inode_unpacked inode_u;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
+ (inode_u.bi_size = new_i_size, 0) ?:
+ bch2_inode_write(trans, &iter, &inode_u);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
+ struct bkey_i *op_k,
+ u64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter fpunch_iter;
+ struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k);
+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
+ u64 new_i_size = le64_to_cpu(op->v.new_i_size);
+ int ret;
+
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ truncate_set_isize(trans, inum, new_i_size));
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents,
+ POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9),
+ BTREE_ITER_INTENT);
+ ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta);
+ bch2_trans_iter_exit(trans, &fpunch_iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+err:
+ bch2_logged_op_finish(trans, op_k);
+ return ret;
+}
+
+int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k)
+{
+ return __bch2_resume_logged_op_truncate(trans, op_k, NULL);
+}
+
+int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta)
+{
+ struct bkey_i_logged_op_truncate op;
+
+ bkey_logged_op_truncate_init(&op.k_i);
+ op.v.subvol = cpu_to_le32(inum.subvol);
+ op.v.inum = cpu_to_le64(inum.inum);
+ op.v.new_i_size = cpu_to_le64(new_i_size);
+
+ return bch2_trans_run(c,
+ bch2_logged_op_start(trans, &op.k_i) ?:
+ __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
+}
+
+/* finsert/fcollapse: */
+
+void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k);
+
+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
+ prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset));
+ prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset));
+}
+
+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
+{
+ struct btree_iter iter;
+ struct bch_inode_unpacked inode_u;
+ int ret;
+
+ offset <<= 9;
+ len <<= 9;
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ return ret;
+
+ if (len > 0) {
+ if (MAX_LFS_FILESIZE - inode_u.bi_size < len) {
+ ret = -EFBIG;
+ goto err;
+ }
+
+ if (offset >= inode_u.bi_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ inode_u.bi_size += len;
+ inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c);
+
+ ret = bch2_inode_write(trans, &iter, &inode_u);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
+ struct bkey_i *op_k,
+ u64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
+ u64 dst_offset = le64_to_cpu(op->v.dst_offset);
+ u64 src_offset = le64_to_cpu(op->v.src_offset);
+ s64 shift = dst_offset - src_offset;
+ u64 len = abs(shift);
+ u64 pos = le64_to_cpu(op->v.pos);
+ bool insert = shift > 0;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, 0),
+ BTREE_ITER_INTENT);
+
+ switch (op->v.state) {
+case LOGGED_OP_FINSERT_start:
+ op->v.state = LOGGED_OP_FINSERT_shift_extents;
+
+ if (insert) {
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ adjust_i_size(trans, inum, src_offset, len) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ if (ret)
+ goto err;
+ } else {
+ bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset));
+
+ ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto err;
+
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ bch2_logged_op_update(trans, &op->k_i));
+ }
+
+ fallthrough;
+case LOGGED_OP_FINSERT_shift_extents:
+ while (1) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete, *copy;
+ struct bkey_s_c k;
+ struct bpos src_pos = POS(inum.inum, src_offset);
+ u32 snapshot;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto btree_err;
+
+ bch2_btree_iter_set_snapshot(&iter, snapshot);
+ bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot));
+
+ k = insert
+ ? bch2_btree_iter_peek_prev(&iter)
+ : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
+ if ((ret = bkey_err(k)))
+ goto btree_err;
+
+ if (!k.k ||
+ k.k->p.inode != inum.inum ||
+ bkey_le(k.k->p, POS(inum.inum, src_offset)))
+ break;
+
+ copy = bch2_bkey_make_mut_noupdate(trans, k);
+ if ((ret = PTR_ERR_OR_ZERO(copy)))
+ goto btree_err;
+
+ if (insert &&
+ bkey_lt(bkey_start_pos(k.k), src_pos)) {
+ bch2_cut_front(src_pos, copy);
+
+ /* Splitting compressed extent? */
+ bch2_disk_reservation_add(c, &disk_res,
+ copy->k.size *
+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)),
+ BCH_DISK_RESERVATION_NOFAIL);
+ }
+
+ bkey_init(&delete.k);
+ delete.k.p = copy->k.p;
+ delete.k.p.snapshot = snapshot;
+ delete.k.size = copy->k.size;
+
+ copy->k.p.offset += shift;
+ copy->k.p.snapshot = snapshot;
+
+ op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
+
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
+ bch2_logged_op_update(trans, &op->k_i) ?:
+ bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
+btree_err:
+ bch2_disk_reservation_put(c, &disk_res);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+
+ pos = le64_to_cpu(op->v.pos);
+ }
+
+ op->v.state = LOGGED_OP_FINSERT_finish;
+
+ if (!insert) {
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ adjust_i_size(trans, inum, src_offset, shift) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ } else {
+ /* We need an inode update to update bi_journal_seq for fsync: */
+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ adjust_i_size(trans, inum, 0, 0) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ }
+
+ break;
+case LOGGED_OP_FINSERT_finish:
+ break;
+ }
+err:
+ bch2_logged_op_finish(trans, op_k);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k)
+{
+ return __bch2_resume_logged_op_finsert(trans, op_k, NULL);
+}
+
+int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
+ u64 offset, u64 len, bool insert,
+ s64 *i_sectors_delta)
+{
+ struct bkey_i_logged_op_finsert op;
+ s64 shift = insert ? len : -len;
+
+ bkey_logged_op_finsert_init(&op.k_i);
+ op.v.subvol = cpu_to_le32(inum.subvol);
+ op.v.inum = cpu_to_le64(inum.inum);
+ op.v.dst_offset = cpu_to_le64(offset + shift);
+ op.v.src_offset = cpu_to_le64(offset);
+ op.v.pos = cpu_to_le64(insert ? U64_MAX : offset);
+
+ return bch2_trans_run(c,
+ bch2_logged_op_start(trans, &op.k_i) ?:
+ __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
+}
diff --git a/libbcachefs/io_misc.h b/libbcachefs/io_misc.h
new file mode 100644
index 0000000..c9e6ed4
--- /dev/null
+++ b/libbcachefs/io_misc.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_MISC_H
+#define _BCACHEFS_IO_MISC_H
+
+int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
+ unsigned, struct bch_io_opts, s64 *,
+ struct write_point_specifier);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+ subvol_inum, u64, s64 *);
+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
+
+void bch2_logged_op_truncate_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_logged_op_truncate ((struct bkey_ops) { \
+ .val_to_text = bch2_logged_op_truncate_to_text, \
+ .min_val_size = 24, \
+})
+
+int bch2_resume_logged_op_truncate(struct btree_trans *, struct bkey_i *);
+
+int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *);
+
+void bch2_logged_op_finsert_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_logged_op_finsert ((struct bkey_ops) { \
+ .val_to_text = bch2_logged_op_finsert_to_text, \
+ .min_val_size = 24, \
+})
+
+int bch2_resume_logged_op_finsert(struct btree_trans *, struct bkey_i *);
+
+int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *);
+
+#endif /* _BCACHEFS_IO_MISC_H */
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
new file mode 100644
index 0000000..443c3ea
--- /dev/null
+++ b/libbcachefs/io_read.c
@@ -0,0 +1,1210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Some low level IO code, and hacks for various block layer limitations
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "data_update.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io_read.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "subvolume.h"
+#include "trace.h"
+
+#include <linux/sched/mm.h>
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ const struct bch_devs_mask *devs;
+ unsigned d, nr = 0, total = 0;
+ u64 now = local_clock(), last;
+ s64 congested;
+ struct bch_dev *ca;
+
+ if (!target)
+ return false;
+
+ rcu_read_lock();
+ devs = bch2_target_to_mask(c, target) ?:
+ &c->rw_devs[BCH_DATA_user];
+
+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
+ ca = rcu_dereference(c->devs[d]);
+ if (!ca)
+ continue;
+
+ congested = atomic_read(&ca->congested);
+ last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ total += max(congested, 0LL);
+ nr++;
+ }
+ rcu_read_unlock();
+
+ return bch2_rand_range(nr * CONGESTED_MAX) < total;
+}
+
+#else
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ return false;
+}
+
+#endif
+
+/* Cache promotion on read */
+
+struct promote_op {
+ struct rcu_head rcu;
+ u64 start_time;
+
+ struct rhash_head hash;
+ struct bpos pos;
+
+ struct data_update write;
+ struct bio_vec bi_inline_vecs[0]; /* must be last */
+};
+
+static const struct rhashtable_params bch_promote_params = {
+ .head_offset = offsetof(struct promote_op, hash),
+ .key_offset = offsetof(struct promote_op, pos),
+ .key_len = sizeof(struct bpos),
+};
+
+static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
+ struct bpos pos,
+ struct bch_io_opts opts,
+ unsigned flags)
+{
+ BUG_ON(!opts.promote_target);
+
+ if (!(flags & BCH_READ_MAY_PROMOTE))
+ return -BCH_ERR_nopromote_may_not;
+
+ if (bch2_bkey_has_target(c, k, opts.promote_target))
+ return -BCH_ERR_nopromote_already_promoted;
+
+ if (bkey_extent_is_unwritten(k))
+ return -BCH_ERR_nopromote_unwritten;
+
+ if (bch2_target_congested(c, opts.promote_target))
+ return -BCH_ERR_nopromote_congested;
+
+ if (rhashtable_lookup_fast(&c->promote_table, &pos,
+ bch_promote_params))
+ return -BCH_ERR_nopromote_in_flight;
+
+ return 0;
+}
+
+static void promote_free(struct bch_fs *c, struct promote_op *op)
+{
+ int ret;
+
+ bch2_data_update_exit(&op->write);
+
+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ kfree_rcu(op, rcu);
+}
+
+static void promote_done(struct bch_write_op *wop)
+{
+ struct promote_op *op =
+ container_of(wop, struct promote_op, write.op);
+ struct bch_fs *c = op->write.op.c;
+
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
+ op->start_time);
+ promote_free(c, op);
+}
+
+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+{
+ struct bio *bio = &op->write.op.wbio.bio;
+
+ trace_and_count(op->write.op.c, read_promote, &rbio->bio);
+
+ /* we now own pages: */
+ BUG_ON(!rbio->bounce);
+ BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+
+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
+ swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
+
+ bch2_data_update_read_done(&op->write, rbio->pick.crc);
+}
+
+static struct promote_op *__promote_alloc(struct btree_trans *trans,
+ enum btree_id btree_id,
+ struct bkey_s_c k,
+ struct bpos pos,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned sectors,
+ struct bch_read_bio **rbio)
+{
+ struct bch_fs *c = trans->c;
+ struct promote_op *op = NULL;
+ struct bio *bio;
+ unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
+ int ret;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
+ return NULL;
+
+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS);
+ if (!op)
+ goto err;
+
+ op->start_time = local_clock();
+ op->pos = pos;
+
+ /*
+ * We don't use the mempool here because extents that aren't
+ * checksummed or compressed can be too big for the mempool:
+ */
+ *rbio = kzalloc(sizeof(struct bch_read_bio) +
+ sizeof(struct bio_vec) * pages,
+ GFP_NOFS);
+ if (!*rbio)
+ goto err;
+
+ rbio_init(&(*rbio)->bio, opts);
+ bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
+
+ if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
+ GFP_NOFS))
+ goto err;
+
+ (*rbio)->bounce = true;
+ (*rbio)->split = true;
+ (*rbio)->kmalloc = true;
+
+ if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
+ bch_promote_params))
+ goto err;
+
+ bio = &op->write.op.wbio.bio;
+ bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
+
+ ret = bch2_data_update_init(trans, NULL, &op->write,
+ writepoint_hashed((unsigned long) current),
+ opts,
+ (struct data_update_opts) {
+ .target = opts.promote_target,
+ .extra_replicas = 1,
+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
+ },
+ btree_id, k);
+ /*
+ * possible errors: -BCH_ERR_nocow_lock_blocked,
+ * -BCH_ERR_ENOSPC_disk_reservation:
+ */
+ if (ret) {
+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
+ goto err;
+ }
+
+ op->write.op.end_io = promote_done;
+
+ return op;
+err:
+ if (*rbio)
+ bio_free_pages(&(*rbio)->bio);
+ kfree(*rbio);
+ *rbio = NULL;
+ kfree(op);
+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ return NULL;
+}
+
+noinline
+static struct promote_op *promote_alloc(struct btree_trans *trans,
+ struct bvec_iter iter,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned flags,
+ struct bch_read_bio **rbio,
+ bool *bounce,
+ bool *read_full)
+{
+ struct bch_fs *c = trans->c;
+ bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+ /* data might have to be decompressed in the write path: */
+ unsigned sectors = promote_full
+ ? max(pick->crc.compressed_size, pick->crc.live_size)
+ : bvec_iter_sectors(iter);
+ struct bpos pos = promote_full
+ ? bkey_start_pos(k.k)
+ : POS(k.k->p.inode, iter.bi_sector);
+ struct promote_op *promote;
+ int ret;
+
+ ret = should_promote(c, k, pos, opts, flags);
+ if (ret)
+ goto nopromote;
+
+ promote = __promote_alloc(trans,
+ k.k->type == KEY_TYPE_reflink_v
+ ? BTREE_ID_reflink
+ : BTREE_ID_extents,
+ k, pos, pick, opts, sectors, rbio);
+ if (!promote) {
+ ret = -BCH_ERR_nopromote_enomem;
+ goto nopromote;
+ }
+
+ *bounce = true;
+ *read_full = promote_full;
+ return promote;
+nopromote:
+ trace_read_nopromote(c, ret);
+ return NULL;
+}
+
+/* Read */
+
+#define READ_RETRY_AVOID 1
+#define READ_RETRY 2
+#define READ_ERR 3
+
+enum rbio_context {
+ RBIO_CONTEXT_NULL,
+ RBIO_CONTEXT_HIGHPRI,
+ RBIO_CONTEXT_UNBOUND,
+};
+
+static inline struct bch_read_bio *
+bch2_rbio_parent(struct bch_read_bio *rbio)
+{
+ return rbio->split ? rbio->parent : rbio;
+}
+
+__always_inline
+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
+ enum rbio_context context,
+ struct workqueue_struct *wq)
+{
+ if (context <= rbio->context) {
+ fn(&rbio->work);
+ } else {
+ rbio->work.func = fn;
+ rbio->context = context;
+ queue_work(wq, &rbio->work);
+ }
+}
+
+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
+{
+ BUG_ON(rbio->bounce && !rbio->split);
+
+ if (rbio->promote)
+ promote_free(rbio->c, rbio->promote);
+ rbio->promote = NULL;
+
+ if (rbio->bounce)
+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
+
+ if (rbio->split) {
+ struct bch_read_bio *parent = rbio->parent;
+
+ if (rbio->kmalloc)
+ kfree(rbio);
+ else
+ bio_put(&rbio->bio);
+
+ rbio = parent;
+ }
+
+ return rbio;
+}
+
+/*
+ * Only called on a top level bch_read_bio to complete an entire read request,
+ * not a split:
+ */
+static void bch2_rbio_done(struct bch_read_bio *rbio)
+{
+ if (rbio->start_time)
+ bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
+ rbio->start_time);
+ bio_endio(&rbio->bio);
+}
+
+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter,
+ struct bch_io_failures *failed,
+ unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ struct bkey_s_c k;
+ int ret;
+
+ flags &= ~BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_MUST_CLONE;
+
+ bch2_bkey_buf_init(&sk);
+
+ bch2_trans_iter_init(trans, &iter, rbio->data_btree,
+ rbio->read_pos, BTREE_ITER_SLOTS);
+retry:
+ rbio->bio.bi_status = 0;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ if (bkey_err(k))
+ goto err;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+ bch2_trans_unlock(trans);
+
+ if (!bch2_bkey_matches_ptr(c, k,
+ rbio->pick.ptr,
+ rbio->data_pos.offset -
+ rbio->pick.crc.offset)) {
+ /* extent we wanted to read no longer exists: */
+ rbio->hole = true;
+ goto out;
+ }
+
+ ret = __bch2_read_extent(trans, rbio, bvec_iter,
+ rbio->read_pos,
+ rbio->data_btree,
+ k, 0, failed, flags);
+ if (ret == READ_RETRY)
+ goto retry;
+ if (ret)
+ goto err;
+out:
+ bch2_rbio_done(rbio);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+ return;
+err:
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ goto out;
+}
+
+static void bch2_rbio_retry(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct bvec_iter iter = rbio->bvec_iter;
+ unsigned flags = rbio->flags;
+ subvol_inum inum = {
+ .subvol = rbio->subvol,
+ .inum = rbio->read_pos.inode,
+ };
+ struct bch_io_failures failed = { .nr = 0 };
+
+ trace_and_count(c, read_retry, &rbio->bio);
+
+ if (rbio->retry == READ_RETRY_AVOID)
+ bch2_mark_io_failure(&failed, &rbio->pick);
+
+ rbio->bio.bi_status = 0;
+
+ rbio = bch2_rbio_free(rbio);
+
+ flags |= BCH_READ_IN_RETRY;
+ flags &= ~BCH_READ_MAY_PROMOTE;
+
+ if (flags & BCH_READ_NODECODE) {
+ bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
+ } else {
+ flags &= ~BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_MUST_CLONE;
+
+ __bch2_read(c, rbio, iter, inum, &failed, flags);
+ }
+}
+
+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
+ blk_status_t error)
+{
+ rbio->retry = retry;
+
+ if (rbio->flags & BCH_READ_IN_RETRY)
+ return;
+
+ if (retry == READ_ERR) {
+ rbio = bch2_rbio_free(rbio);
+
+ rbio->bio.bi_status = error;
+ bch2_rbio_done(rbio);
+ } else {
+ bch2_rbio_punt(rbio, bch2_rbio_retry,
+ RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ }
+}
+
+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
+ struct bch_read_bio *rbio)
+{
+ struct bch_fs *c = rbio->c;
+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
+ struct bch_extent_crc_unpacked new_crc;
+ struct btree_iter iter;
+ struct bkey_i *new;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ if (crc_is_compressed(rbio->pick.crc))
+ return 0;
+
+ k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ if ((ret = bkey_err(k)))
+ goto out;
+
+ if (bversion_cmp(k.k->version, rbio->version) ||
+ !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
+ goto out;
+
+ /* Extent was merged? */
+ if (bkey_start_offset(k.k) < data_offset ||
+ k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
+ goto out;
+
+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
+ rbio->pick.crc, NULL, &new_crc,
+ bkey_start_offset(k.k) - data_offset, k.k->size,
+ rbio->pick.crc.csum_type)) {
+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * going to be temporarily appending another checksum entry:
+ */
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+ sizeof(struct bch_extent_crc128));
+ if ((ret = PTR_ERR_OR_ZERO(new)))
+ goto out;
+
+ bkey_reassemble(new, k);
+
+ if (!bch2_bkey_narrow_crcs(new, new_crc))
+ goto out;
+
+ ret = bch2_trans_update(trans, &iter, new,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
+{
+ bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
+ __bch2_rbio_narrow_crcs(trans, rbio));
+}
+
+/* Inner part that may run in process context */
+static void __bch2_read_endio(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+ struct bio *src = &rbio->bio;
+ struct bio *dst = &bch2_rbio_parent(rbio)->bio;
+ struct bvec_iter dst_iter = rbio->bvec_iter;
+ struct bch_extent_crc_unpacked crc = rbio->pick.crc;
+ struct nonce nonce = extent_nonce(rbio->version, crc);
+ unsigned nofs_flags;
+ struct bch_csum csum;
+ int ret;
+
+ nofs_flags = memalloc_nofs_save();
+
+ /* Reset iterator for checksumming and copying bounced data: */
+ if (rbio->bounce) {
+ src->bi_iter.bi_size = crc.compressed_size << 9;
+ src->bi_iter.bi_idx = 0;
+ src->bi_iter.bi_bvec_done = 0;
+ } else {
+ src->bi_iter = rbio->bvec_iter;
+ }
+
+ csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
+ if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
+ goto csum_err;
+
+ /*
+ * XXX
+ * We need to rework the narrow_crcs path to deliver the read completion
+ * first, and then punt to a different workqueue, otherwise we're
+ * holding up reads while doing btree updates which is bad for memory
+ * reclaim.
+ */
+ if (unlikely(rbio->narrow_crcs))
+ bch2_rbio_narrow_crcs(rbio);
+
+ if (rbio->flags & BCH_READ_NODECODE)
+ goto nodecode;
+
+ /* Adjust crc to point to subset of data we want: */
+ crc.offset += rbio->offset_into_extent;
+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
+
+ if (crc_is_compressed(crc)) {
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
+ !c->opts.no_data_io)
+ goto decompression_err;
+ } else {
+ /* don't need to decrypt the entire bio: */
+ nonce = nonce_add(nonce, crc.offset << 9);
+ bio_advance(src, crc.offset << 9);
+
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+ src->bi_iter.bi_size = dst_iter.bi_size;
+
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ if (rbio->bounce) {
+ struct bvec_iter src_iter = src->bi_iter;
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+ }
+ }
+
+ if (rbio->promote) {
+ /*
+ * Re encrypt data we decrypted, so it's consistent with
+ * rbio->crc:
+ */
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ promote_start(rbio->promote, rbio);
+ rbio->promote = NULL;
+ }
+nodecode:
+ if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
+ rbio = bch2_rbio_free(rbio);
+ bch2_rbio_done(rbio);
+ }
+out:
+ memalloc_nofs_restore(nofs_flags);
+ return;
+csum_err:
+ /*
+ * Checksum error: if the bio wasn't bounced, we may have been
+ * reading into buffers owned by userspace (that userspace can
+ * scribble over) - retry the read, bouncing it this time:
+ */
+ if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+ rbio->flags |= BCH_READ_MUST_BOUNCE;
+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
+ goto out;
+ }
+
+ bch_err_inum_offset_ratelimited(ca,
+ rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
+ rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
+ csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
+ bch2_io_error(ca);
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+decompression_err:
+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "decompression error");
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ goto out;
+decrypt_err:
+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "decrypt error");
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ goto out;
+}
+
+static void bch2_read_endio(struct bio *bio)
+{
+ struct bch_read_bio *rbio =
+ container_of(bio, struct bch_read_bio, bio);
+ struct bch_fs *c = rbio->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+ struct workqueue_struct *wq = NULL;
+ enum rbio_context context = RBIO_CONTEXT_NULL;
+
+ if (rbio->have_ioref) {
+ bch2_latency_acct(ca, rbio->submit_time, READ);
+ percpu_ref_put(&ca->io_ref);
+ }
+
+ if (!rbio->split)
+ rbio->bio.bi_end_io = rbio->end_io;
+
+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+ rbio->read_pos.inode,
+ rbio->read_pos.offset,
+ "data read error: %s",
+ bch2_blk_status_to_str(bio->bi_status))) {
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+ return;
+ }
+
+ if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+ ptr_stale(ca, &rbio->pick.ptr)) {
+ trace_and_count(c, read_reuse_race, &rbio->bio);
+
+ if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
+ else
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
+ return;
+ }
+
+ if (rbio->narrow_crcs ||
+ rbio->promote ||
+ crc_is_compressed(rbio->pick.crc) ||
+ bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
+ context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
+ else if (rbio->pick.crc.csum_type)
+ context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
+
+ bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
+}
+
+int __bch2_read_indirect_extent(struct btree_trans *trans,
+ unsigned *offset_into_extent,
+ struct bkey_buf *orig_k)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 reflink_offset;
+ int ret;
+
+ reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
+ *offset_into_extent;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
+ POS(0, reflink_offset), 0);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_reflink_v &&
+ k.k->type != KEY_TYPE_indirect_inline_data) {
+ bch_err_inum_offset_ratelimited(trans->c,
+ orig_k->k->k.p.inode,
+ orig_k->k->k.p.offset << 9,
+ "%llu len %u points to nonexistent indirect extent %llu",
+ orig_k->k->k.p.offset,
+ orig_k->k->k.size,
+ reflink_offset);
+ bch2_inconsistent_error(trans->c);
+ ret = -EIO;
+ goto err;
+ }
+
+ *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
+ bch2_bkey_buf_reassemble(orig_k, trans->c, k);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bch_extent_ptr ptr)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
+ struct btree_iter iter;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ PTR_BUCKET_POS(c, &ptr),
+ BTREE_ITER_CACHED);
+
+ prt_printf(&buf, "Attempting to read from stale dirty pointer:");
+ printbuf_indent_add(&buf, 2);
+ prt_newline(&buf);
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
+
+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ if (!ret) {
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, k);
+ }
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+}
+
+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
+ struct bvec_iter iter, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
+ unsigned offset_into_extent,
+ struct bch_io_failures *failed, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct extent_ptr_decoded pick;
+ struct bch_read_bio *rbio = NULL;
+ struct bch_dev *ca = NULL;
+ struct promote_op *promote = NULL;
+ bool bounce = false, read_full = false, narrow_crcs = false;
+ struct bpos data_pos = bkey_start_pos(k.k);
+ int pick_ret;
+
+ if (bkey_extent_is_inline_data(k.k)) {
+ unsigned bytes = min_t(unsigned, iter.bi_size,
+ bkey_inline_data_bytes(k.k));
+
+ swap(iter.bi_size, bytes);
+ memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
+ swap(iter.bi_size, bytes);
+ bio_advance_iter(&orig->bio, &iter, bytes);
+ zero_fill_bio_iter(&orig->bio, iter);
+ goto out_read_done;
+ }
+retry_pick:
+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
+
+ /* hole or reservation - just zero fill: */
+ if (!pick_ret)
+ goto hole;
+
+ if (pick_ret < 0) {
+ bch_err_inum_offset_ratelimited(c,
+ read_pos.inode, read_pos.offset << 9,
+ "no device to read from");
+ goto err;
+ }
+
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ /*
+ * Stale dirty pointers are treated as IO errors, but @failed isn't
+ * allocated unless we're in the retry path - so if we're not in the
+ * retry path, don't check here, it'll be caught in bch2_read_endio()
+ * and we'll end up in the retry path:
+ */
+ if ((flags & BCH_READ_IN_RETRY) &&
+ !pick.ptr.cached &&
+ unlikely(ptr_stale(ca, &pick.ptr))) {
+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
+ bch2_mark_io_failure(failed, &pick);
+ goto retry_pick;
+ }
+
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ bch2_trans_unlock(trans);
+
+ if (flags & BCH_READ_NODECODE) {
+ /*
+ * can happen if we retry, and the extent we were going to read
+ * has been merged in the meantime:
+ */
+ if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
+ goto hole;
+
+ iter.bi_size = pick.crc.compressed_size << 9;
+ goto get_bio;
+ }
+
+ if (!(flags & BCH_READ_LAST_FRAGMENT) ||
+ bio_flagged(&orig->bio, BIO_CHAIN))
+ flags |= BCH_READ_MUST_CLONE;
+
+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
+
+ if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
+ flags |= BCH_READ_MUST_BOUNCE;
+
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+ if (crc_is_compressed(pick.crc) ||
+ (pick.crc.csum_type != BCH_CSUM_none &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+ (flags & BCH_READ_USER_MAPPED)) ||
+ (flags & BCH_READ_MUST_BOUNCE)))) {
+ read_full = true;
+ bounce = true;
+ }
+
+ if (orig->opts.promote_target)
+ promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
+ &rbio, &bounce, &read_full);
+
+ if (!read_full) {
+ EBUG_ON(crc_is_compressed(pick.crc));
+ EBUG_ON(pick.crc.csum_type &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ bvec_iter_sectors(iter) != pick.crc.live_size ||
+ pick.crc.offset ||
+ offset_into_extent));
+
+ data_pos.offset += offset_into_extent;
+ pick.ptr.offset += pick.crc.offset +
+ offset_into_extent;
+ offset_into_extent = 0;
+ pick.crc.compressed_size = bvec_iter_sectors(iter);
+ pick.crc.uncompressed_size = bvec_iter_sectors(iter);
+ pick.crc.offset = 0;
+ pick.crc.live_size = bvec_iter_sectors(iter);
+ }
+get_bio:
+ if (rbio) {
+ /*
+ * promote already allocated bounce rbio:
+ * promote needs to allocate a bio big enough for uncompressing
+ * data in the write path, but we're not going to use it all
+ * here:
+ */
+ EBUG_ON(rbio->bio.bi_iter.bi_size <
+ pick.crc.compressed_size << 9);
+ rbio->bio.bi_iter.bi_size =
+ pick.crc.compressed_size << 9;
+ } else if (bounce) {
+ unsigned sectors = pick.crc.compressed_size;
+
+ rbio = rbio_init(bio_alloc_bioset(NULL,
+ DIV_ROUND_UP(sectors, PAGE_SECTORS),
+ 0,
+ GFP_NOFS,
+ &c->bio_read_split),
+ orig->opts);
+
+ bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
+ rbio->bounce = true;
+ rbio->split = true;
+ } else if (flags & BCH_READ_MUST_CLONE) {
+ /*
+ * Have to clone if there were any splits, due to error
+ * reporting issues (if a split errored, and retrying didn't
+ * work, when it reports the error to its parent (us) we don't
+ * know if the error was from our bio, and we should retry, or
+ * from the whole bio, in which case we don't want to retry and
+ * lose the error)
+ */
+ rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
+ &c->bio_read_split),
+ orig->opts);
+ rbio->bio.bi_iter = iter;
+ rbio->split = true;
+ } else {
+ rbio = orig;
+ rbio->bio.bi_iter = iter;
+ EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
+ }
+
+ EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
+
+ rbio->c = c;
+ rbio->submit_time = local_clock();
+ if (rbio->split)
+ rbio->parent = orig;
+ else
+ rbio->end_io = orig->bio.bi_end_io;
+ rbio->bvec_iter = iter;
+ rbio->offset_into_extent= offset_into_extent;
+ rbio->flags = flags;
+ rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
+ rbio->narrow_crcs = narrow_crcs;
+ rbio->hole = 0;
+ rbio->retry = 0;
+ rbio->context = 0;
+ /* XXX: only initialize this if needed */
+ rbio->devs_have = bch2_bkey_devs(k);
+ rbio->pick = pick;
+ rbio->subvol = orig->subvol;
+ rbio->read_pos = read_pos;
+ rbio->data_btree = data_btree;
+ rbio->data_pos = data_pos;
+ rbio->version = k.k->version;
+ rbio->promote = promote;
+ INIT_WORK(&rbio->work, NULL);
+
+ rbio->bio.bi_opf = orig->bio.bi_opf;
+ rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
+ rbio->bio.bi_end_io = bch2_read_endio;
+
+ if (rbio->bounce)
+ trace_and_count(c, read_bounce, &rbio->bio);
+
+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
+ bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
+
+ /*
+ * If it's being moved internally, we don't want to flag it as a cache
+ * hit:
+ */
+ if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
+ bch2_bucket_io_time_reset(trans, pick.ptr.dev,
+ PTR_BUCKET_NR(ca, &pick.ptr), READ);
+
+ if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
+ bio_inc_remaining(&orig->bio);
+ trace_and_count(c, read_split, &orig->bio);
+ }
+
+ if (!rbio->pick.idx) {
+ if (!rbio->have_ioref) {
+ bch_err_inum_offset_ratelimited(c,
+ read_pos.inode,
+ read_pos.offset << 9,
+ "no device to read from");
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+ }
+
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
+ bio_sectors(&rbio->bio));
+ bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
+
+ if (unlikely(c->opts.no_data_io)) {
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ bio_endio(&rbio->bio);
+ } else {
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ submit_bio(&rbio->bio);
+ else
+ submit_bio_wait(&rbio->bio);
+ }
+
+ /*
+ * We just submitted IO which may block, we expect relock fail
+ * events and shouldn't count them:
+ */
+ trans->notrace_relock_fail = true;
+ } else {
+ /* Attempting reconstruct read: */
+ if (bch2_ec_read_extent(c, rbio)) {
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+ }
+
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ bio_endio(&rbio->bio);
+ }
+out:
+ if (likely(!(flags & BCH_READ_IN_RETRY))) {
+ return 0;
+ } else {
+ int ret;
+
+ rbio->context = RBIO_CONTEXT_UNBOUND;
+ bch2_read_endio(&rbio->bio);
+
+ ret = rbio->retry;
+ rbio = bch2_rbio_free(rbio);
+
+ if (ret == READ_RETRY_AVOID) {
+ bch2_mark_io_failure(failed, &pick);
+ ret = READ_RETRY;
+ }
+
+ if (!ret)
+ goto out_read_done;
+
+ return ret;
+ }
+
+err:
+ if (flags & BCH_READ_IN_RETRY)
+ return READ_ERR;
+
+ orig->bio.bi_status = BLK_STS_IOERR;
+ goto out_read_done;
+
+hole:
+ /*
+ * won't normally happen in the BCH_READ_NODECODE
+ * (bch2_move_extent()) path, but if we retry and the extent we wanted
+ * to read no longer exists we have to signal that:
+ */
+ if (flags & BCH_READ_NODECODE)
+ orig->hole = true;
+
+ zero_fill_bio_iter(&orig->bio, iter);
+out_read_done:
+ if (flags & BCH_READ_LAST_FRAGMENT)
+ bch2_rbio_done(orig);
+ return 0;
+}
+
+void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter, subvol_inum inum,
+ struct bch_io_failures *failed, unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ BUG_ON(flags & BCH_READ_NODECODE);
+
+ bch2_bkey_buf_init(&sk);
+retry:
+ bch2_trans_begin(trans);
+ iter = (struct btree_iter) { NULL };
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
+ BTREE_ITER_SLOTS);
+ while (1) {
+ unsigned bytes, sectors, offset_into_extent;
+ enum btree_id data_btree = BTREE_ID_extents;
+
+ /*
+ * read_extent -> io_time_reset may cause a transaction restart
+ * without returning an error, we need to check for that here:
+ */
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(&iter,
+ POS(inum.inum, bvec_iter.bi_sector));
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ offset_into_extent = iter.pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+
+ ret = bch2_read_indirect_extent(trans, &data_btree,
+ &offset_into_extent, &sk);
+ if (ret)
+ break;
+
+ k = bkey_i_to_s_c(sk.k);
+
+ /*
+ * With indirect extents, the amount of data to read is the min
+ * of the original extent and the indirect extent:
+ */
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
+ swap(bvec_iter.bi_size, bytes);
+
+ if (bvec_iter.bi_size == bytes)
+ flags |= BCH_READ_LAST_FRAGMENT;
+
+ ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
+ data_btree, k,
+ offset_into_extent, failed, flags);
+ if (ret)
+ break;
+
+ if (flags & BCH_READ_LAST_FRAGMENT)
+ break;
+
+ swap(bvec_iter.bi_size, bytes);
+ bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
+
+ ret = btree_trans_too_many_iters(trans);
+ if (ret)
+ break;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ ret == READ_RETRY ||
+ ret == READ_RETRY_AVOID)
+ goto retry;
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+
+ if (ret) {
+ bch_err_inum_offset_ratelimited(c, inum.inum,
+ bvec_iter.bi_sector << 9,
+ "read error %i from btree lookup", ret);
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ bch2_rbio_done(rbio);
+ }
+}
+
+void bch2_fs_io_read_exit(struct bch_fs *c)
+{
+ if (c->promote_table.tbl)
+ rhashtable_destroy(&c->promote_table);
+ bioset_exit(&c->bio_read_split);
+ bioset_exit(&c->bio_read);
+}
+
+int bch2_fs_io_read_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_init;
+
+ if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_split_init;
+
+ if (rhashtable_init(&c->promote_table, &bch_promote_params))
+ return -BCH_ERR_ENOMEM_promote_table_init;
+
+ return 0;
+}
diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h
new file mode 100644
index 0000000..d9c18bb
--- /dev/null
+++ b/libbcachefs/io_read.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_READ_H
+#define _BCACHEFS_IO_READ_H
+
+#include "bkey_buf.h"
+
+struct bch_read_bio {
+ struct bch_fs *c;
+ u64 start_time;
+ u64 submit_time;
+
+ /*
+ * Reads will often have to be split, and if the extent being read from
+ * was checksummed or compressed we'll also have to allocate bounce
+ * buffers and copy the data back into the original bio.
+ *
+ * If we didn't have to split, we have to save and restore the original
+ * bi_end_io - @split below indicates which:
+ */
+ union {
+ struct bch_read_bio *parent;
+ bio_end_io_t *end_io;
+ };
+
+ /*
+ * Saved copy of bio->bi_iter, from submission time - allows us to
+ * resubmit on IO error, and also to copy data back to the original bio
+ * when we're bouncing:
+ */
+ struct bvec_iter bvec_iter;
+
+ unsigned offset_into_extent;
+
+ u16 flags;
+ union {
+ struct {
+ u16 bounce:1,
+ split:1,
+ kmalloc:1,
+ have_ioref:1,
+ narrow_crcs:1,
+ hole:1,
+ retry:2,
+ context:2;
+ };
+ u16 _state;
+ };
+
+ struct bch_devs_list devs_have;
+
+ struct extent_ptr_decoded pick;
+
+ /*
+ * pos we read from - different from data_pos for indirect extents:
+ */
+ u32 subvol;
+ struct bpos read_pos;
+
+ /*
+ * start pos of data we read (may not be pos of data we want) - for
+ * promote, narrow extents paths:
+ */
+ enum btree_id data_btree;
+ struct bpos data_pos;
+ struct bversion version;
+
+ struct promote_op *promote;
+
+ struct bch_io_opts opts;
+
+ struct work_struct work;
+
+ struct bio bio;
+};
+
+#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio)
+
+struct bch_devs_mask;
+struct cache_promote_op;
+struct extent_ptr_decoded;
+
+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
+ struct bkey_buf *);
+
+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
+ enum btree_id *data_btree,
+ unsigned *offset_into_extent,
+ struct bkey_buf *k)
+{
+ if (k->k->k.type != KEY_TYPE_reflink_p)
+ return 0;
+
+ *data_btree = BTREE_ID_reflink;
+ return __bch2_read_indirect_extent(trans, offset_into_extent, k);
+}
+
+enum bch_read_flags {
+ BCH_READ_RETRY_IF_STALE = 1 << 0,
+ BCH_READ_MAY_PROMOTE = 1 << 1,
+ BCH_READ_USER_MAPPED = 1 << 2,
+ BCH_READ_NODECODE = 1 << 3,
+ BCH_READ_LAST_FRAGMENT = 1 << 4,
+
+ /* internal: */
+ BCH_READ_MUST_BOUNCE = 1 << 5,
+ BCH_READ_MUST_CLONE = 1 << 6,
+ BCH_READ_IN_RETRY = 1 << 7,
+};
+
+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
+ struct bvec_iter, struct bpos, enum btree_id,
+ struct bkey_s_c, unsigned,
+ struct bch_io_failures *, unsigned);
+
+static inline void bch2_read_extent(struct btree_trans *trans,
+ struct bch_read_bio *rbio, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
+ unsigned offset_into_extent, unsigned flags)
+{
+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
+ data_btree, k, offset_into_extent, NULL, flags);
+}
+
+void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
+ subvol_inum, struct bch_io_failures *, unsigned flags);
+
+static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ subvol_inum inum)
+{
+ struct bch_io_failures failed = { .nr = 0 };
+
+ BUG_ON(rbio->_state);
+
+ rbio->c = c;
+ rbio->start_time = local_clock();
+ rbio->subvol = inum.subvol;
+
+ __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
+ BCH_READ_RETRY_IF_STALE|
+ BCH_READ_MAY_PROMOTE|
+ BCH_READ_USER_MAPPED);
+}
+
+static inline struct bch_read_bio *rbio_init(struct bio *bio,
+ struct bch_io_opts opts)
+{
+ struct bch_read_bio *rbio = to_rbio(bio);
+
+ rbio->_state = 0;
+ rbio->promote = NULL;
+ rbio->opts = opts;
+ return rbio;
+}
+
+void bch2_fs_io_read_exit(struct bch_fs *);
+int bch2_fs_io_read_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_IO_READ_H */
diff --git a/libbcachefs/io.c b/libbcachefs/io_write.c
index 3c614c8..d2a0de8 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io_write.c
@@ -1,29 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Some low level IO code, and hacks for various block layer limitations
- *
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
*/
#include "bcachefs.h"
-#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_buf.h"
#include "bset.h"
#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
-#include "compress.h"
#include "clock.h"
-#include "data_update.h"
+#include "compress.h"
#include "debug.h"
-#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extent_update.h"
#include "inode.h"
-#include "io.h"
+#include "io_write.h"
#include "journal.h"
#include "keylist.h"
#include "move.h"
@@ -39,48 +34,8 @@
#include <linux/random.h>
#include <linux/sched/mm.h>
-const char *bch2_blk_status_to_str(blk_status_t status)
-{
- if (status == BLK_STS_REMOVED)
- return "device removed";
- return blk_status_to_str(status);
-}
-
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-static bool bch2_target_congested(struct bch_fs *c, u16 target)
-{
- const struct bch_devs_mask *devs;
- unsigned d, nr = 0, total = 0;
- u64 now = local_clock(), last;
- s64 congested;
- struct bch_dev *ca;
-
- if (!target)
- return false;
-
- rcu_read_lock();
- devs = bch2_target_to_mask(c, target) ?:
- &c->rw_devs[BCH_DATA_user];
-
- for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
- ca = rcu_dereference(c->devs[d]);
- if (!ca)
- continue;
-
- congested = atomic_read(&ca->congested);
- last = READ_ONCE(ca->congested_last);
- if (time_after64(now, last))
- congested -= (now - last) >> 12;
-
- total += max(congested, 0LL);
- nr++;
- }
- rcu_read_unlock();
-
- return bch2_rand_range(nr * CONGESTED_MAX) < total;
-}
-
static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
u64 now, int rw)
{
@@ -136,13 +91,6 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
}
-#else
-
-static bool bch2_target_congested(struct bch_fs *c, u16 target)
-{
- return false;
-}
-
#endif
/* Allocate, free from mempool: */
@@ -368,213 +316,13 @@ int bch2_extent_update(struct btree_trans *trans,
return 0;
}
-/* Overwrites whatever was present with zeroes: */
-int bch2_extent_fallocate(struct btree_trans *trans,
- subvol_inum inum,
- struct btree_iter *iter,
- unsigned sectors,
- struct bch_io_opts opts,
- s64 *i_sectors_delta,
- struct write_point_specifier write_point)
-{
- struct bch_fs *c = trans->c;
- struct disk_reservation disk_res = { 0 };
- struct closure cl;
- struct open_buckets open_buckets = { 0 };
- struct bkey_s_c k;
- struct bkey_buf old, new;
- unsigned sectors_allocated = 0;
- bool have_reservation = false;
- bool unwritten = opts.nocow &&
- c->sb.version >= bcachefs_metadata_version_unwritten_extents;
- int ret;
-
- bch2_bkey_buf_init(&old);
- bch2_bkey_buf_init(&new);
- closure_init_stack(&cl);
-
- k = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(k);
- if (ret)
- return ret;
-
- sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
-
- if (!have_reservation) {
- unsigned new_replicas =
- max(0, (int) opts.data_replicas -
- (int) bch2_bkey_nr_ptrs_fully_allocated(k));
- /*
- * Get a disk reservation before (in the nocow case) calling
- * into the allocator:
- */
- ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
- if (unlikely(ret))
- goto err;
-
- bch2_bkey_buf_reassemble(&old, c, k);
- }
-
- if (have_reservation) {
- if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
- goto err;
-
- bch2_key_resize(&new.k->k, sectors);
- } else if (!unwritten) {
- struct bkey_i_reservation *reservation;
-
- bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
- reservation = bkey_reservation_init(new.k);
- reservation->k.p = iter->pos;
- bch2_key_resize(&reservation->k, sectors);
- reservation->v.nr_replicas = opts.data_replicas;
- } else {
- struct bkey_i_extent *e;
- struct bch_devs_list devs_have;
- struct write_point *wp;
- struct bch_extent_ptr *ptr;
-
- devs_have.nr = 0;
-
- bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
-
- e = bkey_extent_init(new.k);
- e->k.p = iter->pos;
-
- ret = bch2_alloc_sectors_start_trans(trans,
- opts.foreground_target,
- false,
- write_point,
- &devs_have,
- opts.data_replicas,
- opts.data_replicas,
- BCH_WATERMARK_normal, 0, &cl, &wp);
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_transaction_restart_nested;
- if (ret)
- goto err;
-
- sectors = min(sectors, wp->sectors_free);
- sectors_allocated = sectors;
-
- bch2_key_resize(&e->k, sectors);
-
- bch2_open_bucket_get(c, wp, &open_buckets);
- bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
- bch2_alloc_sectors_done(c, wp);
-
- extent_for_each_ptr(extent_i_to_s(e), ptr)
- ptr->unwritten = true;
- }
-
- have_reservation = true;
-
- ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
- 0, i_sectors_delta, true);
-err:
- if (!ret && sectors_allocated)
- bch2_increment_clock(c, sectors_allocated, WRITE);
-
- bch2_open_buckets_put(c, &open_buckets);
- bch2_disk_reservation_put(c, &disk_res);
- bch2_bkey_buf_exit(&new, c);
- bch2_bkey_buf_exit(&old, c);
-
- if (closure_nr_remaining(&cl) != 1) {
- bch2_trans_unlock(trans);
- closure_sync(&cl);
- }
-
- return ret;
-}
-
-/*
- * Returns -BCH_ERR_transacton_restart if we had to drop locks:
- */
-int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
- subvol_inum inum, u64 end,
- s64 *i_sectors_delta)
-{
- struct bch_fs *c = trans->c;
- unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
- struct bpos end_pos = POS(inum.inum, end);
- struct bkey_s_c k;
- int ret = 0, ret2 = 0;
- u32 snapshot;
-
- while (!ret ||
- bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- struct disk_reservation disk_res =
- bch2_disk_reservation_init(c, 0);
- struct bkey_i delete;
-
- if (ret)
- ret2 = ret;
-
- bch2_trans_begin(trans);
-
- ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
- if (ret)
- continue;
-
- bch2_btree_iter_set_snapshot(iter, snapshot);
-
- /*
- * peek_upto() doesn't have ideal semantics for extents:
- */
- k = bch2_btree_iter_peek_upto(iter, end_pos);
- if (!k.k)
- break;
-
- ret = bkey_err(k);
- if (ret)
- continue;
-
- bkey_init(&delete.k);
- delete.k.p = iter->pos;
-
- /* create the biggest key we can */
- bch2_key_resize(&delete.k, max_sectors);
- bch2_cut_back(end_pos, &delete);
-
- ret = bch2_extent_update(trans, inum, iter, &delete,
- &disk_res, 0, i_sectors_delta, false);
- bch2_disk_reservation_put(c, &disk_res);
- }
-
- return ret ?: ret2;
-}
-
-int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
- s64 *i_sectors_delta)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- int ret;
-
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
- POS(inum.inum, start),
- BTREE_ITER_INTENT);
-
- ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta);
-
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
-
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- ret = 0;
-
- return ret;
-}
-
static int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct bkey_buf sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
subvol_inum inum = {
.subvol = op->subvol,
@@ -585,30 +333,29 @@ static int bch2_write_index_default(struct bch_write_op *op)
BUG_ON(!inum.subvol);
bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
do {
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
k = bch2_keylist_front(keys);
bch2_bkey_buf_copy(&sk, c, k);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol,
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol,
&sk.k->k.p.snapshot);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
bkey_start_pos(&sk.k->k),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- ret = bch2_extent_update(&trans, inum, &iter, sk.k,
+ ret = bch2_extent_update(trans, inum, &iter, sk.k,
&op->res,
op->new_i_size, &op->i_sectors_delta,
op->flags & BCH_WRITE_CHECK_ENOSPC);
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@@ -621,7 +368,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
bch2_cut_front(iter.pos, k);
} while (!bch2_keylist_empty(keys));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
@@ -741,7 +488,8 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
}
/**
- * bch_write_index - after a write, update index to point to new data
+ * __bch2_write_index - after a write, update index to point to new data
+ * @op: bch_write_op to process
*/
static void __bch2_write_index(struct bch_write_op *op)
{
@@ -778,10 +526,10 @@ static void __bch2_write_index(struct bch_write_op *op)
op->written += sectors_start - keylist_sectors(keys);
if (ret && !bch2_err_matches(ret, EROFS)) {
- struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
bch_err_inum_offset_ratelimited(c,
- k->k.p.inode, k->k.p.offset << 9,
+ insert->k.p.inode, insert->k.p.offset << 9,
"write error while doing btree update: %s",
bch2_err_str(ret));
}
@@ -1182,7 +930,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
do {
struct bch_extent_crc_unpacked crc = { 0 };
struct bversion version = op->version;
- size_t dst_len, src_len;
+ size_t dst_len = 0, src_len = 0;
if (page_alloc_failed &&
dst->bi_iter.bi_size < (wp->sectors_free << 9) &&
@@ -1414,27 +1162,25 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_i *orig;
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
for_each_keylist_key(&op->insert_keys, orig) {
- ret = for_each_btree_key_upto_commit(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents,
bkey_start_pos(&orig->k), orig->k.p,
BTREE_ITER_INTENT, k,
NULL, NULL, BTREE_INSERT_NOFAIL, ({
- bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size);
+ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
}));
if (ret && !bch2_err_matches(ret, EROFS)) {
- struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
bch_err_inum_offset_ratelimited(c,
- k->k.p.inode, k->k.p.offset << 9,
+ insert->k.p.inode, insert->k.p.offset << 9,
"write error while doing btree update: %s",
bch2_err_str(ret));
}
@@ -1445,7 +1191,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
}
static void __bch2_nocow_write_done(struct bch_write_op *op)
@@ -1469,7 +1215,7 @@ static void bch2_nocow_write_done(struct closure *cl)
static void bch2_nocow_write(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_ptrs_c ptrs;
@@ -1486,15 +1232,15 @@ static void bch2_nocow_write(struct bch_write_op *op)
if (op->flags & BCH_WRITE_MOVE)
return;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, op->subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, op->subvol, &snapshot);
if (unlikely(ret))
goto err;
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(op->pos.inode, op->pos.offset, snapshot),
BTREE_ITER_SLOTS);
while (1) {
@@ -1540,7 +1286,7 @@ retry:
/* Unlock before taking nocow locks, doing IO: */
bkey_reassemble(op->insert_keys.top, k);
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
bch2_cut_front(op->pos, op->insert_keys.top);
if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
@@ -1589,7 +1335,7 @@ retry:
bch2_btree_iter_advance(&iter);
}
out:
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
@@ -1604,7 +1350,7 @@ err:
op->flags |= BCH_WRITE_DONE;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
/* fallback to cow write path? */
if (!(op->flags & BCH_WRITE_DONE)) {
@@ -1682,7 +1428,7 @@ again:
* allocations for specific disks may hang arbitrarily long:
*/
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_alloc_sectors_start_trans(&trans,
+ bch2_alloc_sectors_start_trans(trans,
op->target,
op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
op->write_point,
@@ -1798,7 +1544,8 @@ err:
}
/**
- * bch_write - handle a write to a cache device or flash only volume
+ * bch2_write() - handle a write to a cache device or flash only volume
+ * @cl: &bch_write_op->cl
*
* This is the starting point for any data to end up in a cache device; it could
* be from a normal write, or a writeback write, or a write to a flash only
@@ -1899,1140 +1646,14 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
printbuf_indent_sub(out, 2);
}
-/* Cache promotion on read */
-
-struct promote_op {
- struct rcu_head rcu;
- u64 start_time;
-
- struct rhash_head hash;
- struct bpos pos;
-
- struct data_update write;
- struct bio_vec bi_inline_vecs[0]; /* must be last */
-};
-
-static const struct rhashtable_params bch_promote_params = {
- .head_offset = offsetof(struct promote_op, hash),
- .key_offset = offsetof(struct promote_op, pos),
- .key_len = sizeof(struct bpos),
-};
-
-static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
- struct bpos pos,
- struct bch_io_opts opts,
- unsigned flags)
-{
- if (!(flags & BCH_READ_MAY_PROMOTE))
- return false;
-
- if (!opts.promote_target)
- return false;
-
- if (bch2_bkey_has_target(c, k, opts.promote_target))
- return false;
-
- if (bkey_extent_is_unwritten(k))
- return false;
-
- if (bch2_target_congested(c, opts.promote_target)) {
- /* XXX trace this */
- return false;
- }
-
- if (rhashtable_lookup_fast(&c->promote_table, &pos,
- bch_promote_params))
- return false;
-
- return true;
-}
-
-static void promote_free(struct bch_fs *c, struct promote_op *op)
+void bch2_fs_io_write_exit(struct bch_fs *c)
{
- int ret;
-
- bch2_data_update_exit(&op->write);
-
- ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params);
- BUG_ON(ret);
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
- kfree_rcu(op, rcu);
-}
-
-static void promote_done(struct bch_write_op *wop)
-{
- struct promote_op *op =
- container_of(wop, struct promote_op, write.op);
- struct bch_fs *c = op->write.op.c;
-
- bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
- op->start_time);
- promote_free(c, op);
-}
-
-static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
-{
- struct bio *bio = &op->write.op.wbio.bio;
-
- trace_and_count(op->write.op.c, read_promote, &rbio->bio);
-
- /* we now own pages: */
- BUG_ON(!rbio->bounce);
- BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-
- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
- sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
- swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-
- bch2_data_update_read_done(&op->write, rbio->pick.crc);
-}
-
-static struct promote_op *__promote_alloc(struct btree_trans *trans,
- enum btree_id btree_id,
- struct bkey_s_c k,
- struct bpos pos,
- struct extent_ptr_decoded *pick,
- struct bch_io_opts opts,
- unsigned sectors,
- struct bch_read_bio **rbio)
-{
- struct bch_fs *c = trans->c;
- struct promote_op *op = NULL;
- struct bio *bio;
- unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
- int ret;
-
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
- return NULL;
-
- op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS);
- if (!op)
- goto err;
-
- op->start_time = local_clock();
- op->pos = pos;
-
- /*
- * We don't use the mempool here because extents that aren't
- * checksummed or compressed can be too big for the mempool:
- */
- *rbio = kzalloc(sizeof(struct bch_read_bio) +
- sizeof(struct bio_vec) * pages,
- GFP_NOFS);
- if (!*rbio)
- goto err;
-
- rbio_init(&(*rbio)->bio, opts);
- bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
-
- if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
- GFP_NOFS))
- goto err;
-
- (*rbio)->bounce = true;
- (*rbio)->split = true;
- (*rbio)->kmalloc = true;
-
- if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
- bch_promote_params))
- goto err;
-
- bio = &op->write.op.wbio.bio;
- bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
-
- ret = bch2_data_update_init(trans, NULL, &op->write,
- writepoint_hashed((unsigned long) current),
- opts,
- (struct data_update_opts) {
- .target = opts.promote_target,
- .extra_replicas = 1,
- .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
- },
- btree_id, k);
- /*
- * possible errors: -BCH_ERR_nocow_lock_blocked,
- * -BCH_ERR_ENOSPC_disk_reservation:
- */
- if (ret) {
- ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params);
- BUG_ON(ret);
- goto err;
- }
-
- op->write.op.end_io = promote_done;
-
- return op;
-err:
- if (*rbio)
- bio_free_pages(&(*rbio)->bio);
- kfree(*rbio);
- *rbio = NULL;
- kfree(op);
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
- return NULL;
-}
-
-noinline
-static struct promote_op *promote_alloc(struct btree_trans *trans,
- struct bvec_iter iter,
- struct bkey_s_c k,
- struct extent_ptr_decoded *pick,
- struct bch_io_opts opts,
- unsigned flags,
- struct bch_read_bio **rbio,
- bool *bounce,
- bool *read_full)
-{
- struct bch_fs *c = trans->c;
- bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
- /* data might have to be decompressed in the write path: */
- unsigned sectors = promote_full
- ? max(pick->crc.compressed_size, pick->crc.live_size)
- : bvec_iter_sectors(iter);
- struct bpos pos = promote_full
- ? bkey_start_pos(k.k)
- : POS(k.k->p.inode, iter.bi_sector);
- struct promote_op *promote;
-
- if (!should_promote(c, k, pos, opts, flags))
- return NULL;
-
- promote = __promote_alloc(trans,
- k.k->type == KEY_TYPE_reflink_v
- ? BTREE_ID_reflink
- : BTREE_ID_extents,
- k, pos, pick, opts, sectors, rbio);
- if (!promote)
- return NULL;
-
- *bounce = true;
- *read_full = promote_full;
- return promote;
-}
-
-/* Read */
-
-#define READ_RETRY_AVOID 1
-#define READ_RETRY 2
-#define READ_ERR 3
-
-enum rbio_context {
- RBIO_CONTEXT_NULL,
- RBIO_CONTEXT_HIGHPRI,
- RBIO_CONTEXT_UNBOUND,
-};
-
-static inline struct bch_read_bio *
-bch2_rbio_parent(struct bch_read_bio *rbio)
-{
- return rbio->split ? rbio->parent : rbio;
-}
-
-__always_inline
-static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
- enum rbio_context context,
- struct workqueue_struct *wq)
-{
- if (context <= rbio->context) {
- fn(&rbio->work);
- } else {
- rbio->work.func = fn;
- rbio->context = context;
- queue_work(wq, &rbio->work);
- }
-}
-
-static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
-{
- BUG_ON(rbio->bounce && !rbio->split);
-
- if (rbio->promote)
- promote_free(rbio->c, rbio->promote);
- rbio->promote = NULL;
-
- if (rbio->bounce)
- bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
-
- if (rbio->split) {
- struct bch_read_bio *parent = rbio->parent;
-
- if (rbio->kmalloc)
- kfree(rbio);
- else
- bio_put(&rbio->bio);
-
- rbio = parent;
- }
-
- return rbio;
-}
-
-/*
- * Only called on a top level bch_read_bio to complete an entire read request,
- * not a split:
- */
-static void bch2_rbio_done(struct bch_read_bio *rbio)
-{
- if (rbio->start_time)
- bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
- rbio->start_time);
- bio_endio(&rbio->bio);
-}
-
-static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter,
- struct bch_io_failures *failed,
- unsigned flags)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_buf sk;
- struct bkey_s_c k;
- int ret;
-
- flags &= ~BCH_READ_LAST_FRAGMENT;
- flags |= BCH_READ_MUST_CLONE;
-
- bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
-
- bch2_trans_iter_init(&trans, &iter, rbio->data_btree,
- rbio->read_pos, BTREE_ITER_SLOTS);
-retry:
- rbio->bio.bi_status = 0;
-
- k = bch2_btree_iter_peek_slot(&iter);
- if (bkey_err(k))
- goto err;
-
- bch2_bkey_buf_reassemble(&sk, c, k);
- k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(&trans);
-
- if (!bch2_bkey_matches_ptr(c, k,
- rbio->pick.ptr,
- rbio->data_pos.offset -
- rbio->pick.crc.offset)) {
- /* extent we wanted to read no longer exists: */
- rbio->hole = true;
- goto out;
- }
-
- ret = __bch2_read_extent(&trans, rbio, bvec_iter,
- rbio->read_pos,
- rbio->data_btree,
- k, 0, failed, flags);
- if (ret == READ_RETRY)
- goto retry;
- if (ret)
- goto err;
-out:
- bch2_rbio_done(rbio);
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
- return;
-err:
- rbio->bio.bi_status = BLK_STS_IOERR;
- goto out;
-}
-
-static void bch2_rbio_retry(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bch_fs *c = rbio->c;
- struct bvec_iter iter = rbio->bvec_iter;
- unsigned flags = rbio->flags;
- subvol_inum inum = {
- .subvol = rbio->subvol,
- .inum = rbio->read_pos.inode,
- };
- struct bch_io_failures failed = { .nr = 0 };
-
- trace_and_count(c, read_retry, &rbio->bio);
-
- if (rbio->retry == READ_RETRY_AVOID)
- bch2_mark_io_failure(&failed, &rbio->pick);
-
- rbio->bio.bi_status = 0;
-
- rbio = bch2_rbio_free(rbio);
-
- flags |= BCH_READ_IN_RETRY;
- flags &= ~BCH_READ_MAY_PROMOTE;
-
- if (flags & BCH_READ_NODECODE) {
- bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
- } else {
- flags &= ~BCH_READ_LAST_FRAGMENT;
- flags |= BCH_READ_MUST_CLONE;
-
- __bch2_read(c, rbio, iter, inum, &failed, flags);
- }
-}
-
-static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
- blk_status_t error)
-{
- rbio->retry = retry;
-
- if (rbio->flags & BCH_READ_IN_RETRY)
- return;
-
- if (retry == READ_ERR) {
- rbio = bch2_rbio_free(rbio);
-
- rbio->bio.bi_status = error;
- bch2_rbio_done(rbio);
- } else {
- bch2_rbio_punt(rbio, bch2_rbio_retry,
- RBIO_CONTEXT_UNBOUND, system_unbound_wq);
- }
-}
-
-static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
- struct bch_read_bio *rbio)
-{
- struct bch_fs *c = rbio->c;
- u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
- struct bch_extent_crc_unpacked new_crc;
- struct btree_iter iter;
- struct bkey_i *new;
- struct bkey_s_c k;
- int ret = 0;
-
- if (crc_is_compressed(rbio->pick.crc))
- return 0;
-
- k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- if ((ret = bkey_err(k)))
- goto out;
-
- if (bversion_cmp(k.k->version, rbio->version) ||
- !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
- goto out;
-
- /* Extent was merged? */
- if (bkey_start_offset(k.k) < data_offset ||
- k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
- goto out;
-
- if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
- rbio->pick.crc, NULL, &new_crc,
- bkey_start_offset(k.k) - data_offset, k.k->size,
- rbio->pick.crc.csum_type)) {
- bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
- ret = 0;
- goto out;
- }
-
- /*
- * going to be temporarily appending another checksum entry:
- */
- new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
- sizeof(struct bch_extent_crc128));
- if ((ret = PTR_ERR_OR_ZERO(new)))
- goto out;
-
- bkey_reassemble(new, k);
-
- if (!bch2_bkey_narrow_crcs(new, new_crc))
- goto out;
-
- ret = bch2_trans_update(trans, &iter, new,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-out:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
-{
- bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
- __bch2_rbio_narrow_crcs(&trans, rbio));
-}
-
-/* Inner part that may run in process context */
-static void __bch2_read_endio(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bch_fs *c = rbio->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
- struct bio *src = &rbio->bio;
- struct bio *dst = &bch2_rbio_parent(rbio)->bio;
- struct bvec_iter dst_iter = rbio->bvec_iter;
- struct bch_extent_crc_unpacked crc = rbio->pick.crc;
- struct nonce nonce = extent_nonce(rbio->version, crc);
- unsigned nofs_flags;
- struct bch_csum csum;
- int ret;
-
- nofs_flags = memalloc_nofs_save();
-
- /* Reset iterator for checksumming and copying bounced data: */
- if (rbio->bounce) {
- src->bi_iter.bi_size = crc.compressed_size << 9;
- src->bi_iter.bi_idx = 0;
- src->bi_iter.bi_bvec_done = 0;
- } else {
- src->bi_iter = rbio->bvec_iter;
- }
-
- csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
- if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
- goto csum_err;
-
- /*
- * XXX
- * We need to rework the narrow_crcs path to deliver the read completion
- * first, and then punt to a different workqueue, otherwise we're
- * holding up reads while doing btree updates which is bad for memory
- * reclaim.
- */
- if (unlikely(rbio->narrow_crcs))
- bch2_rbio_narrow_crcs(rbio);
-
- if (rbio->flags & BCH_READ_NODECODE)
- goto nodecode;
-
- /* Adjust crc to point to subset of data we want: */
- crc.offset += rbio->offset_into_extent;
- crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
-
- if (crc_is_compressed(crc)) {
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
-
- if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
- !c->opts.no_data_io)
- goto decompression_err;
- } else {
- /* don't need to decrypt the entire bio: */
- nonce = nonce_add(nonce, crc.offset << 9);
- bio_advance(src, crc.offset << 9);
-
- BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
- src->bi_iter.bi_size = dst_iter.bi_size;
-
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
-
- if (rbio->bounce) {
- struct bvec_iter src_iter = src->bi_iter;
-
- bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
- }
- }
-
- if (rbio->promote) {
- /*
- * Re encrypt data we decrypted, so it's consistent with
- * rbio->crc:
- */
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
-
- promote_start(rbio->promote, rbio);
- rbio->promote = NULL;
- }
-nodecode:
- if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
- rbio = bch2_rbio_free(rbio);
- bch2_rbio_done(rbio);
- }
-out:
- memalloc_nofs_restore(nofs_flags);
- return;
-csum_err:
- /*
- * Checksum error: if the bio wasn't bounced, we may have been
- * reading into buffers owned by userspace (that userspace can
- * scribble over) - retry the read, bouncing it this time:
- */
- if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
- rbio->flags |= BCH_READ_MUST_BOUNCE;
- bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
- goto out;
- }
-
- bch_err_inum_offset_ratelimited(ca,
- rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
- rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
- csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
- bch2_io_error(ca);
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
- goto out;
-decompression_err:
- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "decompression error");
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
- goto out;
-decrypt_err:
- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "decrypt error");
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
- goto out;
-}
-
-static void bch2_read_endio(struct bio *bio)
-{
- struct bch_read_bio *rbio =
- container_of(bio, struct bch_read_bio, bio);
- struct bch_fs *c = rbio->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
- struct workqueue_struct *wq = NULL;
- enum rbio_context context = RBIO_CONTEXT_NULL;
-
- if (rbio->have_ioref) {
- bch2_latency_acct(ca, rbio->submit_time, READ);
- percpu_ref_put(&ca->io_ref);
- }
-
- if (!rbio->split)
- rbio->bio.bi_end_io = rbio->end_io;
-
- if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
- rbio->read_pos.inode,
- rbio->read_pos.offset,
- "data read error: %s",
- bch2_blk_status_to_str(bio->bi_status))) {
- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
- return;
- }
-
- if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
- ptr_stale(ca, &rbio->pick.ptr)) {
- trace_and_count(c, read_reuse_race, &rbio->bio);
-
- if (rbio->flags & BCH_READ_RETRY_IF_STALE)
- bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
- else
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
- return;
- }
-
- if (rbio->narrow_crcs ||
- rbio->promote ||
- crc_is_compressed(rbio->pick.crc) ||
- bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
- context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
- else if (rbio->pick.crc.csum_type)
- context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
-
- bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
-}
-
-int __bch2_read_indirect_extent(struct btree_trans *trans,
- unsigned *offset_into_extent,
- struct bkey_buf *orig_k)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- u64 reflink_offset;
- int ret;
-
- reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
- *offset_into_extent;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
- POS(0, reflink_offset), 0);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
- if (k.k->type != KEY_TYPE_reflink_v &&
- k.k->type != KEY_TYPE_indirect_inline_data) {
- bch_err_inum_offset_ratelimited(trans->c,
- orig_k->k->k.p.inode,
- orig_k->k->k.p.offset << 9,
- "%llu len %u points to nonexistent indirect extent %llu",
- orig_k->k->k.p.offset,
- orig_k->k->k.size,
- reflink_offset);
- bch2_inconsistent_error(trans->c);
- ret = -EIO;
- goto err;
- }
-
- *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
- bch2_bkey_buf_reassemble(orig_k, trans->c, k);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
- struct bkey_s_c k,
- struct bch_extent_ptr ptr)
-{
- struct bch_fs *c = trans->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
- struct btree_iter iter;
- struct printbuf buf = PRINTBUF;
- int ret;
-
- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
- PTR_BUCKET_POS(c, &ptr),
- BTREE_ITER_CACHED);
-
- prt_printf(&buf, "Attempting to read from stale dirty pointer:");
- printbuf_indent_add(&buf, 2);
- prt_newline(&buf);
-
- bch2_bkey_val_to_text(&buf, c, k);
- prt_newline(&buf);
-
- prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
-
- ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
- if (!ret) {
- prt_newline(&buf);
- bch2_bkey_val_to_text(&buf, c, k);
- }
-
- bch2_fs_inconsistent(c, "%s", buf.buf);
-
- bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
-}
-
-int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
- struct bvec_iter iter, struct bpos read_pos,
- enum btree_id data_btree, struct bkey_s_c k,
- unsigned offset_into_extent,
- struct bch_io_failures *failed, unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct extent_ptr_decoded pick;
- struct bch_read_bio *rbio = NULL;
- struct bch_dev *ca = NULL;
- struct promote_op *promote = NULL;
- bool bounce = false, read_full = false, narrow_crcs = false;
- struct bpos data_pos = bkey_start_pos(k.k);
- int pick_ret;
-
- if (bkey_extent_is_inline_data(k.k)) {
- unsigned bytes = min_t(unsigned, iter.bi_size,
- bkey_inline_data_bytes(k.k));
-
- swap(iter.bi_size, bytes);
- memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
- swap(iter.bi_size, bytes);
- bio_advance_iter(&orig->bio, &iter, bytes);
- zero_fill_bio_iter(&orig->bio, iter);
- goto out_read_done;
- }
-retry_pick:
- pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
-
- /* hole or reservation - just zero fill: */
- if (!pick_ret)
- goto hole;
-
- if (pick_ret < 0) {
- bch_err_inum_offset_ratelimited(c,
- read_pos.inode, read_pos.offset << 9,
- "no device to read from");
- goto err;
- }
-
- ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-
- /*
- * Stale dirty pointers are treated as IO errors, but @failed isn't
- * allocated unless we're in the retry path - so if we're not in the
- * retry path, don't check here, it'll be caught in bch2_read_endio()
- * and we'll end up in the retry path:
- */
- if ((flags & BCH_READ_IN_RETRY) &&
- !pick.ptr.cached &&
- unlikely(ptr_stale(ca, &pick.ptr))) {
- read_from_stale_dirty_pointer(trans, k, pick.ptr);
- bch2_mark_io_failure(failed, &pick);
- goto retry_pick;
- }
-
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(trans);
-
- if (flags & BCH_READ_NODECODE) {
- /*
- * can happen if we retry, and the extent we were going to read
- * has been merged in the meantime:
- */
- if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
- goto hole;
-
- iter.bi_size = pick.crc.compressed_size << 9;
- goto get_bio;
- }
-
- if (!(flags & BCH_READ_LAST_FRAGMENT) ||
- bio_flagged(&orig->bio, BIO_CHAIN))
- flags |= BCH_READ_MUST_CLONE;
-
- narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
- bch2_can_narrow_extent_crcs(k, pick.crc);
-
- if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
- flags |= BCH_READ_MUST_BOUNCE;
-
- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-
- if (crc_is_compressed(pick.crc) ||
- (pick.crc.csum_type != BCH_CSUM_none &&
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
- (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
- (flags & BCH_READ_USER_MAPPED)) ||
- (flags & BCH_READ_MUST_BOUNCE)))) {
- read_full = true;
- bounce = true;
- }
-
- if (orig->opts.promote_target)
- promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
- &rbio, &bounce, &read_full);
-
- if (!read_full) {
- EBUG_ON(crc_is_compressed(pick.crc));
- EBUG_ON(pick.crc.csum_type &&
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
- bvec_iter_sectors(iter) != pick.crc.live_size ||
- pick.crc.offset ||
- offset_into_extent));
-
- data_pos.offset += offset_into_extent;
- pick.ptr.offset += pick.crc.offset +
- offset_into_extent;
- offset_into_extent = 0;
- pick.crc.compressed_size = bvec_iter_sectors(iter);
- pick.crc.uncompressed_size = bvec_iter_sectors(iter);
- pick.crc.offset = 0;
- pick.crc.live_size = bvec_iter_sectors(iter);
- offset_into_extent = 0;
- }
-get_bio:
- if (rbio) {
- /*
- * promote already allocated bounce rbio:
- * promote needs to allocate a bio big enough for uncompressing
- * data in the write path, but we're not going to use it all
- * here:
- */
- EBUG_ON(rbio->bio.bi_iter.bi_size <
- pick.crc.compressed_size << 9);
- rbio->bio.bi_iter.bi_size =
- pick.crc.compressed_size << 9;
- } else if (bounce) {
- unsigned sectors = pick.crc.compressed_size;
-
- rbio = rbio_init(bio_alloc_bioset(NULL,
- DIV_ROUND_UP(sectors, PAGE_SECTORS),
- 0,
- GFP_NOFS,
- &c->bio_read_split),
- orig->opts);
-
- bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
- rbio->bounce = true;
- rbio->split = true;
- } else if (flags & BCH_READ_MUST_CLONE) {
- /*
- * Have to clone if there were any splits, due to error
- * reporting issues (if a split errored, and retrying didn't
- * work, when it reports the error to its parent (us) we don't
- * know if the error was from our bio, and we should retry, or
- * from the whole bio, in which case we don't want to retry and
- * lose the error)
- */
- rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
- &c->bio_read_split),
- orig->opts);
- rbio->bio.bi_iter = iter;
- rbio->split = true;
- } else {
- rbio = orig;
- rbio->bio.bi_iter = iter;
- EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
- }
-
- EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
-
- rbio->c = c;
- rbio->submit_time = local_clock();
- if (rbio->split)
- rbio->parent = orig;
- else
- rbio->end_io = orig->bio.bi_end_io;
- rbio->bvec_iter = iter;
- rbio->offset_into_extent= offset_into_extent;
- rbio->flags = flags;
- rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
- rbio->narrow_crcs = narrow_crcs;
- rbio->hole = 0;
- rbio->retry = 0;
- rbio->context = 0;
- /* XXX: only initialize this if needed */
- rbio->devs_have = bch2_bkey_devs(k);
- rbio->pick = pick;
- rbio->subvol = orig->subvol;
- rbio->read_pos = read_pos;
- rbio->data_btree = data_btree;
- rbio->data_pos = data_pos;
- rbio->version = k.k->version;
- rbio->promote = promote;
- INIT_WORK(&rbio->work, NULL);
-
- rbio->bio.bi_opf = orig->bio.bi_opf;
- rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
- rbio->bio.bi_end_io = bch2_read_endio;
-
- if (rbio->bounce)
- trace_and_count(c, read_bounce, &rbio->bio);
-
- this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
- bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
-
- /*
- * If it's being moved internally, we don't want to flag it as a cache
- * hit:
- */
- if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
- bch2_bucket_io_time_reset(trans, pick.ptr.dev,
- PTR_BUCKET_NR(ca, &pick.ptr), READ);
-
- if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
- bio_inc_remaining(&orig->bio);
- trace_and_count(c, read_split, &orig->bio);
- }
-
- if (!rbio->pick.idx) {
- if (!rbio->have_ioref) {
- bch_err_inum_offset_ratelimited(c,
- read_pos.inode,
- read_pos.offset << 9,
- "no device to read from");
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
- goto out;
- }
-
- this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
- bio_sectors(&rbio->bio));
- bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
-
- if (unlikely(c->opts.no_data_io)) {
- if (likely(!(flags & BCH_READ_IN_RETRY)))
- bio_endio(&rbio->bio);
- } else {
- if (likely(!(flags & BCH_READ_IN_RETRY)))
- submit_bio(&rbio->bio);
- else
- submit_bio_wait(&rbio->bio);
- }
-
- /*
- * We just submitted IO which may block, we expect relock fail
- * events and shouldn't count them:
- */
- trans->notrace_relock_fail = true;
- } else {
- /* Attempting reconstruct read: */
- if (bch2_ec_read_extent(c, rbio)) {
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
- goto out;
- }
-
- if (likely(!(flags & BCH_READ_IN_RETRY)))
- bio_endio(&rbio->bio);
- }
-out:
- if (likely(!(flags & BCH_READ_IN_RETRY))) {
- return 0;
- } else {
- int ret;
-
- rbio->context = RBIO_CONTEXT_UNBOUND;
- bch2_read_endio(&rbio->bio);
-
- ret = rbio->retry;
- rbio = bch2_rbio_free(rbio);
-
- if (ret == READ_RETRY_AVOID) {
- bch2_mark_io_failure(failed, &pick);
- ret = READ_RETRY;
- }
-
- if (!ret)
- goto out_read_done;
-
- return ret;
- }
-
-err:
- if (flags & BCH_READ_IN_RETRY)
- return READ_ERR;
-
- orig->bio.bi_status = BLK_STS_IOERR;
- goto out_read_done;
-
-hole:
- /*
- * won't normally happen in the BCH_READ_NODECODE
- * (bch2_move_extent()) path, but if we retry and the extent we wanted
- * to read no longer exists we have to signal that:
- */
- if (flags & BCH_READ_NODECODE)
- orig->hole = true;
-
- zero_fill_bio_iter(&orig->bio, iter);
-out_read_done:
- if (flags & BCH_READ_LAST_FRAGMENT)
- bch2_rbio_done(orig);
- return 0;
-}
-
-void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter, subvol_inum inum,
- struct bch_io_failures *failed, unsigned flags)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_buf sk;
- struct bkey_s_c k;
- u32 snapshot;
- int ret;
-
- BUG_ON(flags & BCH_READ_NODECODE);
-
- bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
-retry:
- bch2_trans_begin(&trans);
- iter = (struct btree_iter) { NULL };
-
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
- if (ret)
- goto err;
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
- SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
- BTREE_ITER_SLOTS);
- while (1) {
- unsigned bytes, sectors, offset_into_extent;
- enum btree_id data_btree = BTREE_ID_extents;
-
- /*
- * read_extent -> io_time_reset may cause a transaction restart
- * without returning an error, we need to check for that here:
- */
- ret = bch2_trans_relock(&trans);
- if (ret)
- break;
-
- bch2_btree_iter_set_pos(&iter,
- POS(inum.inum, bvec_iter.bi_sector));
-
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
- if (ret)
- break;
-
- offset_into_extent = iter.pos.offset -
- bkey_start_offset(k.k);
- sectors = k.k->size - offset_into_extent;
-
- bch2_bkey_buf_reassemble(&sk, c, k);
-
- ret = bch2_read_indirect_extent(&trans, &data_btree,
- &offset_into_extent, &sk);
- if (ret)
- break;
-
- k = bkey_i_to_s_c(sk.k);
-
- /*
- * With indirect extents, the amount of data to read is the min
- * of the original extent and the indirect extent:
- */
- sectors = min(sectors, k.k->size - offset_into_extent);
-
- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
- swap(bvec_iter.bi_size, bytes);
-
- if (bvec_iter.bi_size == bytes)
- flags |= BCH_READ_LAST_FRAGMENT;
-
- ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos,
- data_btree, k,
- offset_into_extent, failed, flags);
- if (ret)
- break;
-
- if (flags & BCH_READ_LAST_FRAGMENT)
- break;
-
- swap(bvec_iter.bi_size, bytes);
- bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
-
- ret = btree_trans_too_many_iters(&trans);
- if (ret)
- break;
- }
-err:
- bch2_trans_iter_exit(&trans, &iter);
-
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
- ret == READ_RETRY ||
- ret == READ_RETRY_AVOID)
- goto retry;
-
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
-
- if (ret) {
- bch_err_inum_offset_ratelimited(c, inum.inum,
- bvec_iter.bi_sector << 9,
- "read error %i from btree lookup", ret);
- rbio->bio.bi_status = BLK_STS_IOERR;
- bch2_rbio_done(rbio);
- }
-}
-
-void bch2_fs_io_exit(struct bch_fs *c)
-{
- if (c->promote_table.tbl)
- rhashtable_destroy(&c->promote_table);
mempool_exit(&c->bio_bounce_pages);
bioset_exit(&c->bio_write);
- bioset_exit(&c->bio_read_split);
- bioset_exit(&c->bio_read);
}
-int bch2_fs_io_init(struct bch_fs *c)
+int bch2_fs_io_write_init(struct bch_fs *c)
{
- if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
- BIOSET_NEED_BVECS))
- return -BCH_ERR_ENOMEM_bio_read_init;
-
- if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
- BIOSET_NEED_BVECS))
- return -BCH_ERR_ENOMEM_bio_read_split_init;
-
if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
BIOSET_NEED_BVECS))
return -BCH_ERR_ENOMEM_bio_write_init;
@@ -3044,8 +1665,5 @@ int bch2_fs_io_init(struct bch_fs *c)
PAGE_SIZE, 0))
return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
- if (rhashtable_init(&c->promote_table, &bch_promote_params))
- return -BCH_ERR_ENOMEM_promote_table_init;
-
return 0;
}
diff --git a/libbcachefs/io_write.h b/libbcachefs/io_write.h
new file mode 100644
index 0000000..9323167
--- /dev/null
+++ b/libbcachefs/io_write.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_WRITE_H
+#define _BCACHEFS_IO_WRITE_H
+
+#include "checksum.h"
+#include "io_write_types.h"
+
+#define to_wbio(_bio) \
+ container_of((_bio), struct bch_write_bio, bio)
+
+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+void bch2_latency_acct(struct bch_dev *, u64, int);
+#else
+static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
+#endif
+
+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
+ enum bch_data_type, const struct bkey_i *, bool);
+
+#define BCH_WRITE_FLAGS() \
+ x(ALLOC_NOWAIT) \
+ x(CACHED) \
+ x(DATA_ENCODED) \
+ x(PAGES_STABLE) \
+ x(PAGES_OWNED) \
+ x(ONLY_SPECIFIED_DEVS) \
+ x(WROTE_DATA_INLINE) \
+ x(FROM_INTERNAL) \
+ x(CHECK_ENOSPC) \
+ x(SYNC) \
+ x(MOVE) \
+ x(IN_WORKER) \
+ x(DONE) \
+ x(IO_ERROR) \
+ x(CONVERT_UNWRITTEN)
+
+enum __bch_write_flags {
+#define x(f) __BCH_WRITE_##f,
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
+enum bch_write_flags {
+#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
+{
+ return op->watermark == BCH_WATERMARK_copygc
+ ? op->c->copygc_wq
+ : op->c->btree_update_wq;
+}
+
+int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, bool *, s64 *, s64 *);
+int bch2_extent_update(struct btree_trans *, subvol_inum,
+ struct btree_iter *, struct bkey_i *,
+ struct disk_reservation *, u64, s64 *, bool);
+
+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
+ struct bch_io_opts opts)
+{
+ op->c = c;
+ op->end_io = NULL;
+ op->flags = 0;
+ op->written = 0;
+ op->error = 0;
+ op->csum_type = bch2_data_checksum_type(c, opts);
+ op->compression_opt = opts.compression;
+ op->nr_replicas = 0;
+ op->nr_replicas_required = c->opts.data_replicas_required;
+ op->watermark = BCH_WATERMARK_normal;
+ op->incompressible = 0;
+ op->open_buckets.nr = 0;
+ op->devs_have.nr = 0;
+ op->target = 0;
+ op->opts = opts;
+ op->subvol = 0;
+ op->pos = POS_MAX;
+ op->version = ZERO_VERSION;
+ op->write_point = (struct write_point_specifier) { 0 };
+ op->res = (struct disk_reservation) { 0 };
+ op->new_i_size = U64_MAX;
+ op->i_sectors_delta = 0;
+ op->devs_need_flush = NULL;
+}
+
+void bch2_write(struct closure *);
+
+void bch2_write_point_do_index_updates(struct work_struct *);
+
+static inline struct bch_write_bio *wbio_init(struct bio *bio)
+{
+ struct bch_write_bio *wbio = to_wbio(bio);
+
+ memset(&wbio->wbio, 0, sizeof(wbio->wbio));
+ return wbio;
+}
+
+void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
+
+void bch2_fs_io_write_exit(struct bch_fs *);
+int bch2_fs_io_write_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_IO_WRITE_H */
diff --git a/libbcachefs/io_types.h b/libbcachefs/io_write_types.h
index 737f16d..c7f97c2 100644
--- a/libbcachefs/io_types.h
+++ b/libbcachefs/io_write_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_IO_TYPES_H
-#define _BCACHEFS_IO_TYPES_H
+#ifndef _BCACHEFS_IO_WRITE_TYPES_H
+#define _BCACHEFS_IO_WRITE_TYPES_H
#include "alloc_types.h"
#include "btree_types.h"
@@ -13,75 +13,6 @@
#include <linux/llist.h>
#include <linux/workqueue.h>
-struct bch_read_bio {
- struct bch_fs *c;
- u64 start_time;
- u64 submit_time;
-
- /*
- * Reads will often have to be split, and if the extent being read from
- * was checksummed or compressed we'll also have to allocate bounce
- * buffers and copy the data back into the original bio.
- *
- * If we didn't have to split, we have to save and restore the original
- * bi_end_io - @split below indicates which:
- */
- union {
- struct bch_read_bio *parent;
- bio_end_io_t *end_io;
- };
-
- /*
- * Saved copy of bio->bi_iter, from submission time - allows us to
- * resubmit on IO error, and also to copy data back to the original bio
- * when we're bouncing:
- */
- struct bvec_iter bvec_iter;
-
- unsigned offset_into_extent;
-
- u16 flags;
- union {
- struct {
- u16 bounce:1,
- split:1,
- kmalloc:1,
- have_ioref:1,
- narrow_crcs:1,
- hole:1,
- retry:2,
- context:2;
- };
- u16 _state;
- };
-
- struct bch_devs_list devs_have;
-
- struct extent_ptr_decoded pick;
-
- /*
- * pos we read from - different from data_pos for indirect extents:
- */
- u32 subvol;
- struct bpos read_pos;
-
- /*
- * start pos of data we read (may not be pos of data we want) - for
- * promote, narrow extents paths:
- */
- enum btree_id data_btree;
- struct bpos data_pos;
- struct bversion version;
-
- struct promote_op *promote;
-
- struct bch_io_opts opts;
-
- struct work_struct work;
-
- struct bio bio;
-};
-
struct bch_write_bio {
struct_group(wbio,
struct bch_fs *c;
@@ -162,4 +93,4 @@ struct bch_write_op {
struct bch_write_bio wbio;
};
-#endif /* _BCACHEFS_IO_TYPES_H */
+#endif /* _BCACHEFS_IO_WRITE_TYPES_H */
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 055920c..fc3dd5b 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -132,13 +132,21 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
return stuck;
}
-/* journal entry close/open: */
-
-void __bch2_journal_buf_put(struct journal *j)
+/*
+ * Final processing when the last reference of a journal buffer has been
+ * dropped. Drop the pin list reference acquired at journal entry open and write
+ * the buffer, if requested.
+ */
+void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
+ lockdep_assert_held(&j->lock);
+
+ if (__bch2_journal_pin_put(j, seq))
+ bch2_journal_reclaim_fast(j);
+ if (write)
+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
}
/*
@@ -204,13 +212,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
buf->data->last_seq = cpu_to_le64(buf->last_seq);
BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq));
- __bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
-
cancel_delayed_work(&j->write_work);
bch2_journal_space_available(j);
- bch2_journal_buf_put(j, old.idx);
+ __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
}
void bch2_journal_halt(struct journal *j)
@@ -588,8 +594,13 @@ out:
/**
* bch2_journal_flush_seq_async - wait for a journal entry to be written
+ * @j: journal object
+ * @seq: seq to flush
+ * @parent: closure object to wait with
+ * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
+ * -EIO if @seq will never be flushed
*
- * like bch2_journal_wait_on_seq, except that it triggers a write immediately if
+ * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
* necessary
*/
int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
@@ -829,12 +840,12 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break;
ret = bch2_trans_run(c,
- bch2_trans_mark_metadata_bucket(&trans, ca,
+ bch2_trans_mark_metadata_bucket(trans, ca,
ob[nr_got]->bucket, BCH_DATA_journal,
ca->mi.bucket_size));
if (ret) {
bch2_open_bucket_put(c, ob[nr_got]);
- bch_err(c, "error marking new journal buckets: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "marking new journal buckets");
break;
}
@@ -910,7 +921,7 @@ err_unblock:
if (ret && !new_fs)
for (i = 0; i < nr_got; i++)
bch2_trans_run(c,
- bch2_trans_mark_metadata_bucket(&trans, ca,
+ bch2_trans_mark_metadata_bucket(trans, ca,
bu[i], BCH_DATA_free, 0));
err_free:
if (!new_fs)
@@ -944,7 +955,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
goto unlock;
while (ja->nr < nr) {
- struct disk_reservation disk_res = { 0, 0 };
+ struct disk_reservation disk_res = { 0, 0, 0 };
/*
* note: journal buckets aren't really counted as _sectors_ used yet, so
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 008a2e2..491133c 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -252,9 +252,10 @@ static inline bool journal_entry_empty(struct jset *j)
return true;
}
-void __bch2_journal_buf_put(struct journal *);
-
-static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
+/*
+ * Drop reference on a buffer index and return true if the count has hit zero.
+ */
+static inline union journal_res_state journal_state_buf_put(struct journal *j, unsigned idx)
{
union journal_res_state s;
@@ -264,9 +265,30 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
.buf2_count = idx == 2,
.buf3_count = idx == 3,
}).v, &j->reservations.counter);
+ return s;
+}
+
+void bch2_journal_buf_put_final(struct journal *, u64, bool);
+
+static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+{
+ union journal_res_state s;
+
+ s = journal_state_buf_put(j, idx);
+ if (!journal_state_count(s, idx))
+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+}
- if (!journal_state_count(s, idx) && idx == s.unwritten_idx)
- __bch2_journal_buf_put(j);
+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+{
+ union journal_res_state s;
+
+ s = journal_state_buf_put(j, idx);
+ if (!journal_state_count(s, idx)) {
+ spin_lock(&j->lock);
+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+ spin_unlock(&j->lock);
+ }
}
/*
@@ -286,7 +308,7 @@ static inline void bch2_journal_res_put(struct journal *j,
BCH_JSET_ENTRY_btree_keys,
0, 0, 0);
- bch2_journal_buf_put(j, res->idx);
+ bch2_journal_buf_put(j, res->idx, res->seq);
res->ref = 0;
}
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 34740dc..6a3d6a3 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -8,7 +8,6 @@
#include "checksum.h"
#include "disk_groups.h"
#include "error.h"
-#include "io.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@@ -238,17 +237,17 @@ static void journal_entry_err_msg(struct printbuf *out,
#define journal_entry_err(c, version, jset, entry, msg, ...) \
({ \
- struct printbuf buf = PRINTBUF; \
+ struct printbuf _buf = PRINTBUF; \
\
- journal_entry_err_msg(&buf, version, jset, entry); \
- prt_printf(&buf, msg, ##__VA_ARGS__); \
+ journal_entry_err_msg(&_buf, version, jset, entry); \
+ prt_printf(&_buf, msg, ##__VA_ARGS__); \
\
switch (flags & BKEY_INVALID_WRITE) { \
case READ: \
- mustfix_fsck_err(c, "%s", buf.buf); \
+ mustfix_fsck_err(c, "%s", _buf.buf); \
break; \
case WRITE: \
- bch_err(c, "corrupt metadata before write: %s\n", buf.buf);\
+ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
if (bch2_fs_inconsistent(c)) { \
ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
@@ -256,7 +255,7 @@ static void journal_entry_err_msg(struct printbuf *out,
break; \
} \
\
- printbuf_exit(&buf); \
+ printbuf_exit(&_buf); \
true; \
})
@@ -1282,7 +1281,7 @@ int bch2_journal_read(struct bch_fs *c,
continue;
for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
+ ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
if (!i->ptrs[ptr].csum_good)
bch_err_dev_offset(ca, i->ptrs[ptr].sector,
@@ -1380,16 +1379,21 @@ static void __journal_write_alloc(struct journal *j,
}
/**
- * journal_next_bucket - move on to the next journal bucket if possible
+ * journal_write_alloc - decide where to write next journal entry
+ *
+ * @j: journal object
+ * @w: journal buf (entry to be written)
+ *
+ * Returns: 0 on success, or -EROFS on failure
*/
-static int journal_write_alloc(struct journal *j, struct journal_buf *w,
- unsigned sectors)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_devs_mask devs;
struct journal_device *ja;
struct bch_dev *ca;
struct dev_alloc_list devs_sorted;
+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
unsigned target = c->opts.metadata_target ?:
c->opts.foreground_target;
unsigned i, replicas = 0, replicas_want =
@@ -1550,6 +1554,7 @@ static void journal_write_done(struct closure *cl)
if (!journal_state_count(new, new.unwritten_idx) &&
journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
+ spin_unlock(&j->lock);
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
} else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
@@ -1562,10 +1567,11 @@ static void journal_write_done(struct closure *cl)
* might want to be written now:
*/
+ spin_unlock(&j->lock);
mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
+ } else {
+ spin_unlock(&j->lock);
}
-
- spin_unlock(&j->lock);
}
static void journal_write_endio(struct bio *bio)
@@ -1813,7 +1819,7 @@ void bch2_journal_write(struct closure *cl)
retry_alloc:
spin_lock(&j->lock);
- ret = journal_write_alloc(j, w, sectors);
+ ret = journal_write_alloc(j, w);
if (ret && j->can_discard) {
spin_unlock(&j->lock);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 10e1860..9a584aa 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -290,9 +290,8 @@ void bch2_journal_do_discards(struct journal *j)
* entry, holding it open to ensure it gets replayed during recovery:
*/
-static void bch2_journal_reclaim_fast(struct journal *j)
+void bch2_journal_reclaim_fast(struct journal *j)
{
- struct journal_entry_pin_list temp;
bool popped = false;
lockdep_assert_held(&j->lock);
@@ -303,7 +302,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
*/
while (!fifo_empty(&j->pin) &&
!atomic_read(&fifo_peek_front(&j->pin).count)) {
- fifo_pop(&j->pin, temp);
+ j->pin.front++;
popped = true;
}
@@ -311,19 +310,16 @@ static void bch2_journal_reclaim_fast(struct journal *j)
bch2_journal_space_available(j);
}
-void __bch2_journal_pin_put(struct journal *j, u64 seq)
+bool __bch2_journal_pin_put(struct journal *j, u64 seq)
{
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
- if (atomic_dec_and_test(&pin_list->count))
- bch2_journal_reclaim_fast(j);
+ return atomic_dec_and_test(&pin_list->count);
}
void bch2_journal_pin_put(struct journal *j, u64 seq)
{
- struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-
- if (atomic_dec_and_test(&pin_list->count)) {
+ if (__bch2_journal_pin_put(j, seq)) {
spin_lock(&j->lock);
bch2_journal_reclaim_fast(j);
spin_unlock(&j->lock);
@@ -419,6 +415,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
/**
* bch2_journal_pin_flush: ensure journal pin callback is no longer running
+ * @j: journal object
+ * @pin: pin to flush
*/
void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
{
@@ -579,7 +577,11 @@ static u64 journal_seq_to_flush(struct journal *j)
}
/**
- * bch2_journal_reclaim - free up journal buckets
+ * __bch2_journal_reclaim - free up journal buckets
+ * @j: journal object
+ * @direct: direct or background reclaim?
+ * @kicked: requested to run since we last ran?
+ * Returns: 0 on success, or -EIO if the journal has been shutdown
*
* Background journal reclaim writes out btree nodes. It should be run
* early enough so that we never completely run out of journal buckets.
@@ -758,7 +760,7 @@ int bch2_journal_reclaim_start(struct journal *j)
"bch-reclaim/%s", c->name);
ret = PTR_ERR_OR_ZERO(p);
if (ret) {
- bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "creating journal reclaim thread");
return ret;
}
diff --git a/libbcachefs/journal_reclaim.h b/libbcachefs/journal_reclaim.h
index 0fd1af1..494d1a6 100644
--- a/libbcachefs/journal_reclaim.h
+++ b/libbcachefs/journal_reclaim.h
@@ -31,7 +31,8 @@ journal_seq_pin(struct journal *j, u64 seq)
return &j->pin.data[seq & j->pin.mask];
}
-void __bch2_journal_pin_put(struct journal *, u64);
+void bch2_journal_reclaim_fast(struct journal *);
+bool __bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c
index d6b9f2c..1e1a794 100644
--- a/libbcachefs/journal_seq_blacklist.c
+++ b/libbcachefs/journal_seq_blacklist.c
@@ -250,20 +250,18 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
struct journal_seq_blacklist_table *t;
struct bch_sb_field_journal_seq_blacklist *bl;
struct journal_seq_blacklist_entry *src, *dst;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
unsigned i, nr, new_nr;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_iter iter;
struct btree *b;
- bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN,
+ bch2_trans_node_iter_init(trans, &iter, i, POS_MIN,
0, 0, BTREE_ITER_PREFETCH);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
b = bch2_btree_iter_peek_node(&iter);
@@ -275,10 +273,10 @@ retry:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
return;
diff --git a/libbcachefs/logged_ops.c b/libbcachefs/logged_ops.c
new file mode 100644
index 0000000..1bf19aa
--- /dev/null
+++ b/libbcachefs/logged_ops.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "error.h"
+#include "io_misc.h"
+#include "logged_ops.h"
+
+struct bch_logged_op_fn {
+ u8 type;
+ int (*resume)(struct btree_trans *, struct bkey_i *);
+};
+
+static const struct bch_logged_op_fn logged_op_fns[] = {
+#define x(n) { \
+ .type = KEY_TYPE_logged_op_##n, \
+ .resume = bch2_resume_logged_op_##n, \
+},
+ BCH_LOGGED_OPS()
+#undef x
+};
+
+static const struct bch_logged_op_fn *logged_op_fn(enum bch_bkey_type type)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(logged_op_fns); i++)
+ if (logged_op_fns[i].type == type)
+ return logged_op_fns + i;
+ return NULL;
+}
+
+static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
+ struct bkey_buf sk;
+ u32 restart_count = trans->restart_count;
+ int ret;
+
+ if (!fn)
+ return 0;
+
+ bch2_bkey_buf_init(&sk);
+ bch2_bkey_buf_reassemble(&sk, c, k);
+
+ ret = fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+
+ bch2_bkey_buf_exit(&sk, c);
+ return ret;
+}
+
+int bch2_resume_logged_ops(struct bch_fs *c)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter,
+ BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k,
+ resume_logged_op(trans, &iter, k)));
+ if (ret)
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX);
+ if (ret)
+ return ret;
+
+ k->k.p = iter.pos;
+
+ ret = bch2_trans_update(trans, &iter, k, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
+{
+ return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ __bch2_logged_op_start(trans, k));
+}
+
+void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
+{
+ int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+ bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
+ /*
+ * This needs to be a fatal error because we've left an unfinished
+ * operation in the logged ops btree.
+ *
+ * We should only ever see an error here if the filesystem has already
+ * been shut down, but make sure of that here:
+ */
+ if (ret) {
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
+ __func__, buf.buf, bch2_err_str(ret));
+ printbuf_exit(&buf);
+ }
+}
diff --git a/libbcachefs/logged_ops.h b/libbcachefs/logged_ops.h
new file mode 100644
index 0000000..4d1e786
--- /dev/null
+++ b/libbcachefs/logged_ops.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_LOGGED_OPS_H
+#define _BCACHEFS_LOGGED_OPS_H
+
+#include "bkey.h"
+
+#define BCH_LOGGED_OPS() \
+ x(truncate) \
+ x(finsert)
+
+static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op)
+{
+ return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0);
+}
+
+int bch2_resume_logged_ops(struct bch_fs *);
+int bch2_logged_op_start(struct btree_trans *, struct bkey_i *);
+void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
+
+#endif /* _BCACHEFS_LOGGED_OPS_H */
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index 3e8b8f2..215a653 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -151,10 +151,10 @@ int bch2_check_lrus(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
- bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos)));
+ bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
if (ret)
bch_err_fn(c, ret);
return ret;
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index 81c8cdb..e3a51f6 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -10,7 +10,7 @@
#include "buckets.h"
#include "errcode.h"
#include "extents.h"
-#include "io.h"
+#include "io_write.h"
#include "journal.h"
#include "keylist.h"
#include "migrate.h"
@@ -78,34 +78,32 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
enum btree_id id;
int ret = 0;
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
for (id = 0; id < BTREE_ID_NR; id++) {
if (!btree_type_has_ptrs(id))
continue;
- ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN,
+ ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags));
+ bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
if (ret)
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct closure cl;
struct btree *b;
@@ -117,16 +115,16 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL;
+ trans = bch2_trans_get(c);
bch2_bkey_buf_init(&k);
- bch2_trans_init(&trans, c, 0, 0);
closure_init_stack(&cl);
for (id = 0; id < BTREE_ID_NR; id++) {
- bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0,
+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_PREFETCH);
retry:
ret = 0;
- while (bch2_trans_begin(&trans),
+ while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
@@ -141,15 +139,14 @@ retry:
break;
}
- ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, 0, false);
+ ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
if (ret) {
- bch_err(c, "Error updating btree node key: %s",
- bch2_err_str(ret));
+ bch_err_msg(c, ret, "updating btree node key");
break;
}
next:
@@ -158,7 +155,7 @@ next:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (ret)
goto err;
@@ -167,8 +164,8 @@ next:
bch2_btree_interior_updates_flush(c);
ret = 0;
err:
- bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&k, c);
+ bch2_trans_put(trans);
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index fb76a1d..39a14e3 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -14,7 +14,8 @@
#include "errcode.h"
#include "error.h"
#include "inode.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
@@ -524,7 +525,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
struct bch_fs *c = ctxt->c;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_buf sk;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
@@ -532,7 +533,6 @@ static int __bch2_move_data(struct moving_context *ctxt,
int ret = 0, ret2;
bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
if (ctxt->stats) {
ctxt->stats->data_type = BCH_DATA_user;
@@ -540,15 +540,15 @@ static int __bch2_move_data(struct moving_context *ctxt,
ctxt->stats->pos = start;
}
- bch2_trans_iter_init(&trans, &iter, btree_id, start,
+ bch2_trans_iter_init(trans, &iter, btree_id, start,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
if (ctxt->rate)
bch2_ratelimit_reset(ctxt->rate);
- while (!move_ratelimit(&trans, ctxt)) {
- bch2_trans_begin(&trans);
+ while (!move_ratelimit(trans, ctxt)) {
+ bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
if (!k.k)
@@ -569,7 +569,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
+ ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
if (ret)
continue;
@@ -584,7 +584,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL,
+ ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
io_opts, btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
@@ -592,7 +592,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
- bch2_move_ctxt_wait_for_io(ctxt, &trans);
+ bch2_move_ctxt_wait_for_io(ctxt, trans);
continue;
}
@@ -609,8 +609,8 @@ next_nondata:
bch2_btree_iter_advance(&iter);
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
@@ -627,7 +627,7 @@ int bch2_move_data(struct bch_fs *c,
{
struct moving_context ctxt;
enum btree_id id;
- int ret;
+ int ret = 0;
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
@@ -723,7 +723,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
if (!bp.level) {
const struct bch_extent_ptr *ptr;
- struct bkey_s_c k;
unsigned i = 0;
k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
@@ -826,15 +825,14 @@ int bch2_evacuate_bucket(struct bch_fs *c,
struct write_point_specifier wp,
bool wait_on_copygc)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct moving_context ctxt;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
- ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts);
+ ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts);
bch2_moving_ctxt_exit(&ctxt);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -851,14 +849,13 @@ static int bch2_move_btree(struct bch_fs *c,
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
enum btree_id id;
struct data_update_opts data_opts;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
progress_list_add(c, stats);
stats->data_type = BCH_DATA_btree;
@@ -871,11 +868,11 @@ static int bch2_move_btree(struct bch_fs *c,
if (!bch2_btree_id_root(c, id)->b)
continue;
- bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0,
+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_PREFETCH);
retry:
ret = 0;
- while (bch2_trans_begin(&trans),
+ while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (kthread && kthread_should_stop())
@@ -890,7 +887,7 @@ retry:
if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
- ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
@@ -901,13 +898,13 @@ next:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (kthread && kthread_should_stop())
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
index c3136ab..cbdd58d 100644
--- a/libbcachefs/move.h
+++ b/libbcachefs/move.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MOVE_H
#define _BCACHEFS_MOVE_H
+#include "bcachefs_ioctl.h"
#include "btree_iter.h"
#include "buckets.h"
#include "data_update.h"
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 256431a..4017120 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -13,25 +13,17 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "clock.h"
-#include "disk_groups.h"
#include "errcode.h"
#include "error.h"
-#include "extents.h"
-#include "eytzinger.h"
-#include "io.h"
-#include "keylist.h"
#include "lru.h"
#include "move.h"
#include "movinggc.h"
-#include "super-io.h"
#include "trace.h"
-#include <linux/bsearch.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/math64.h>
#include <linux/sched/task.h>
-#include <linux/sort.h>
#include <linux/wait.h>
struct buckets_in_flight {
@@ -156,7 +148,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
- size_t nr_to_get = max(16UL, buckets_in_flight->nr / 4);
+ size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
int ret;
@@ -172,7 +164,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
0, k, ({
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
- int ret = 0;
+ int ret2 = 0;
saw++;
@@ -181,11 +173,11 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
else if (bucket_in_flight(buckets_in_flight, b.k))
in_flight++;
else {
- ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
- if (ret >= 0)
+ ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
+ if (ret2 >= 0)
sectors += b.sectors;
}
- ret;
+ ret2;
}));
pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
@@ -242,7 +234,7 @@ err:
ret = 0;
if (ret < 0 && !bch2_err_matches(ret, EROFS))
- bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "from bch2_move_data()");
moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
trace_and_count(c, copygc, c, moved, 0, 0, 0);
@@ -308,25 +300,24 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
static int bch2_copygc_thread(void *arg)
{
struct bch_fs *c = arg;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct moving_context ctxt;
struct bch_move_stats move_stats;
struct io_clock *clock = &c->io_clock[WRITE];
- struct buckets_in_flight move_buckets;
+ struct buckets_in_flight buckets;
u64 last, wait;
int ret = 0;
- memset(&move_buckets, 0, sizeof(move_buckets));
+ memset(&buckets, 0, sizeof(buckets));
- ret = rhashtable_init(&move_buckets.table, &bch_move_bucket_params);
+ ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
if (ret) {
- bch_err(c, "error allocating copygc buckets in flight: %s",
- bch2_err_str(ret));
+ bch_err_msg(c, ret, "allocating copygc buckets in flight");
return ret;
}
set_freezable();
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
@@ -334,16 +325,16 @@ static int bch2_copygc_thread(void *arg)
false);
while (!ret && !kthread_should_stop()) {
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
cond_resched();
if (!c->copy_gc_enabled) {
- move_buckets_wait(&trans, &ctxt, &move_buckets, true);
+ move_buckets_wait(trans, &ctxt, &buckets, true);
kthread_wait_freezable(c->copy_gc_enabled);
}
if (unlikely(freezing(current))) {
- move_buckets_wait(&trans, &ctxt, &move_buckets, true);
+ move_buckets_wait(trans, &ctxt, &buckets, true);
__refrigerator(false);
continue;
}
@@ -354,7 +345,7 @@ static int bch2_copygc_thread(void *arg)
if (wait > clock->max_slop) {
c->copygc_wait_at = last;
c->copygc_wait = last + wait;
- move_buckets_wait(&trans, &ctxt, &move_buckets, true);
+ move_buckets_wait(trans, &ctxt, &buckets, true);
trace_and_count(c, copygc_wait, c, wait, last + wait);
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);
@@ -364,15 +355,15 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
c->copygc_running = true;
- ret = bch2_copygc(&trans, &ctxt, &move_buckets);
+ ret = bch2_copygc(trans, &ctxt, &buckets);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
}
- move_buckets_wait(&trans, &ctxt, &move_buckets, true);
- rhashtable_destroy(&move_buckets.table);
- bch2_trans_exit(&trans);
+ move_buckets_wait(trans, &ctxt, &buckets, true);
+ rhashtable_destroy(&buckets.table);
+ bch2_trans_put(trans);
bch2_moving_ctxt_exit(&ctxt);
return 0;
@@ -404,7 +395,7 @@ int bch2_copygc_start(struct bch_fs *c)
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
- bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "creating copygc thread");
return ret;
}
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 960bb24..739a2ef 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -471,8 +471,9 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
val = "0";
}
+ /* Unknown options are ignored: */
if (id < 0)
- goto bad_opt;
+ continue;
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 8a9db11..c21c258 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -469,7 +469,7 @@ struct bch_opts {
#undef x
};
-static const struct bch_opts bch2_opts_default = {
+static const __maybe_unused struct bch_opts bch2_opts_default = {
#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \
._name##_defined = true, \
._name = _default, \
diff --git a/libbcachefs/printbuf.c b/libbcachefs/printbuf.c
index c41daa1..de41f9a 100644
--- a/libbcachefs/printbuf.c
+++ b/libbcachefs/printbuf.c
@@ -81,8 +81,10 @@ void bch2_prt_printf(struct printbuf *out, const char *fmt, ...)
}
/**
- * printbuf_str - returns printbuf's buf as a C string, guaranteed to be null
- * terminated
+ * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be
+ * null terminated
+ * @buf: printbuf to terminate
+ * Returns: Printbuf contents, as a nul terminated C string
*/
const char *bch2_printbuf_str(const struct printbuf *buf)
{
@@ -97,8 +99,9 @@ const char *bch2_printbuf_str(const struct printbuf *buf)
}
/**
- * printbuf_exit - exit a printbuf, freeing memory it owns and poisoning it
+ * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it
* against accidental use.
+ * @buf: printbuf to exit
*/
void bch2_printbuf_exit(struct printbuf *buf)
{
@@ -120,7 +123,7 @@ void bch2_printbuf_tabstop_pop(struct printbuf *buf)
}
/*
- * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
+ * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop
*
* @buf: printbuf to control
* @spaces: number of spaces from previous tabpstop
@@ -144,7 +147,7 @@ int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
}
/**
- * printbuf_indent_add - add to the current indent level
+ * bch2_printbuf_indent_add() - add to the current indent level
*
* @buf: printbuf to control
* @spaces: number of spaces to add to the current indent level
@@ -164,7 +167,7 @@ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
}
/**
- * printbuf_indent_sub - subtract from the current indent level
+ * bch2_printbuf_indent_sub() - subtract from the current indent level
*
* @buf: printbuf to control
* @spaces: number of spaces to subtract from the current indent level
@@ -227,9 +230,8 @@ static void __prt_tab(struct printbuf *out)
}
/**
- * prt_tab - Advance printbuf to the next tabstop
- *
- * @buf: printbuf to control
+ * bch2_prt_tab() - Advance printbuf to the next tabstop
+ * @out: printbuf to control
*
* Advance output to the next tabstop by printing spaces.
*/
@@ -267,7 +269,7 @@ static void __prt_tab_rjust(struct printbuf *buf)
}
/**
- * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
+ * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
* previous output
*
* @buf: printbuf to control
@@ -284,11 +286,11 @@ void bch2_prt_tab_rjust(struct printbuf *buf)
}
/**
- * prt_bytes_indented - Print an array of chars, handling embedded control characters
+ * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters
*
- * @out: printbuf to output to
- * @str: string to print
- * @count: number of bytes to print
+ * @out: output printbuf
+ * @str: string to print
+ * @count: number of bytes to print
*
* The following contol characters are handled as so:
* \n: prt_newline newline that obeys current indent level
@@ -335,32 +337,38 @@ void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned cou
}
/**
- * prt_human_readable_u64 - Print out a u64 in human readable units
+ * bch2_prt_human_readable_u64() - Print out a u64 in human readable units
+ * @out: output printbuf
+ * @v: integer to print
*
- * Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
*/
-void bch2_prt_human_readable_u64(struct printbuf *buf, u64 v)
+void bch2_prt_human_readable_u64(struct printbuf *out, u64 v)
{
- bch2_printbuf_make_room(buf, 10);
- buf->pos += string_get_size(v, 1, !buf->si_units,
- buf->buf + buf->pos,
- printbuf_remaining_size(buf));
+ bch2_printbuf_make_room(out, 10);
+ out->pos += string_get_size(v, 1, !out->si_units,
+ out->buf + out->pos,
+ printbuf_remaining_size(out));
}
/**
- * prt_human_readable_s64 - Print out a s64 in human readable units
+ * bch2_prt_human_readable_s64() - Print out a s64 in human readable units
+ * @out: output printbuf
+ * @v: integer to print
*
- * Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
*/
-void bch2_prt_human_readable_s64(struct printbuf *buf, s64 v)
+void bch2_prt_human_readable_s64(struct printbuf *out, s64 v)
{
if (v < 0)
- prt_char(buf, '-');
- bch2_prt_human_readable_u64(buf, abs(v));
+ prt_char(out, '-');
+ bch2_prt_human_readable_u64(out, abs(v));
}
/**
- * prt_units_u64 - Print out a u64 according to printbuf unit options
+ * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options
+ * @out: output printbuf
+ * @v: integer to print
*
* Units are either raw (default), or human reabable units (controlled via
* @buf->human_readable_units)
@@ -374,7 +382,9 @@ void bch2_prt_units_u64(struct printbuf *out, u64 v)
}
/**
- * prt_units_s64 - Print out a s64 according to printbuf unit options
+ * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options
+ * @out: output printbuf
+ * @v: integer to print
*
* Units are either raw (default), or human reabable units (controlled via
* @buf->human_readable_units)
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
index ca99772..36de2f0 100644
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -572,7 +572,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans,
if (!s_t.master_subvol)
goto advance;
- ret = bch2_inode_find_by_inum_trans(trans,
+ ret = bch2_inode_find_by_inum_nowarn_trans(trans,
(subvol_inum) {
le32_to_cpu(s_t.master_subvol),
k.k->p.offset,
@@ -599,7 +599,7 @@ advance:
int bch2_fs_quota_read(struct bch_fs *c)
{
struct bch_sb_field_quota *sb_quota;
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
@@ -614,16 +614,16 @@ int bch2_fs_quota_read(struct bch_fs *c)
bch2_sb_quota_read(c);
mutex_unlock(&c->sb_lock);
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas,
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas,
POS_MIN, BTREE_ITER_PREFETCH, k,
__bch2_quota_set(c, k, NULL)) ?:
- for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
+ for_each_btree_key2(trans, iter, BTREE_ID_inodes,
POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
- bch2_fs_quota_read_inode(&trans, &iter, k));
+ bch2_fs_quota_read_inode(trans, &iter, k));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@@ -786,7 +786,6 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
{
struct bch_fs *c = sb->s_fs_info;
struct bch_sb_field_quota *sb_quota;
- struct bch_memquota_type *q;
int ret = 0;
if (0) {
@@ -810,8 +809,6 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
return -EINVAL;
- q = &c->quotas[type];
-
mutex_lock(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
@@ -959,7 +956,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
+ bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);
return bch2_err_class(ret);
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 15ce3ec..568f1e8 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -8,8 +8,6 @@
#include "compress.h"
#include "disk_groups.h"
#include "errcode.h"
-#include "extents.h"
-#include "io.h"
#include "move.h"
#include "rebalance.h"
#include "super-io.h"
@@ -350,7 +348,7 @@ int bch2_rebalance_start(struct bch_fs *c)
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
ret = PTR_ERR_OR_ZERO(p);
if (ret) {
- bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "creating rebalance thread");
return ret;
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 30efb3c..1dceb7e 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -20,6 +20,7 @@
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "lru.h"
+#include "logged_ops.h"
#include "move.h"
#include "quota.h"
#include "recovery.h"
@@ -164,7 +165,7 @@ static int bch2_journal_replay(struct bch_fs *c)
(!k->allocated
? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
: 0),
- bch2_journal_replay_key(&trans, k));
+ bch2_journal_replay_key(trans, k));
if (ret) {
bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
@@ -422,15 +423,9 @@ static int bch2_initialize_subvolumes(struct bch_fs *c)
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees,
- &root_tree.k_i,
- NULL, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_snapshots,
- &root_snapshot.k_i,
- NULL, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_subvolumes,
- &root_volume.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -471,7 +466,7 @@ noinline_for_stack
static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
{
int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
- __bch2_fs_upgrade_for_subvolumes(&trans));
+ __bch2_fs_upgrade_for_subvolumes(trans));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -561,7 +556,7 @@ static void check_version_upgrade(struct bch_fs *c)
if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
prt_str(&buf, "fsck required");
else {
- prt_str(&buf, "running recovery passses: ");
+ prt_str(&buf, "running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
}
@@ -1009,9 +1004,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_pack(&packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
- ret = bch2_btree_insert(c, BTREE_ID_inodes,
- &packed_inode.inode.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "creating root directory");
goto err;
@@ -1020,7 +1013,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init_early(c, &lostfound_inode);
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_create_trans(&trans,
+ bch2_create_trans(trans,
BCACHEFS_ROOT_SUBVOL_INUM,
&root_inode, &lostfound_inode,
&lostfound,
diff --git a/libbcachefs/recovery_types.h b/libbcachefs/recovery_types.h
index abf1f83..f3c9ea7 100644
--- a/libbcachefs/recovery_types.h
+++ b/libbcachefs/recovery_types.h
@@ -24,6 +24,7 @@
x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 0) \
+ x(resume_logged_ops, PASS_ALWAYS) \
x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 39f711d..d77d0ea 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -5,9 +5,11 @@
#include "buckets.h"
#include "extents.h"
#include "inode.h"
-#include "io.h"
+#include "io_misc.h"
+#include "io_write.h"
#include "reflink.h"
#include "subvolume.h"
+#include "super-io.h"
#include <linux/sched/signal.h>
@@ -89,6 +91,9 @@ void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
bch2_bkey_ptrs_to_text(out, c, k);
}
+#if 0
+Currently disabled, needs to be debugged:
+
bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
{
struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l);
@@ -96,6 +101,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
}
+#endif
int bch2_trans_mark_reflink_v(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
@@ -247,7 +253,7 @@ s64 bch2_remap_range(struct bch_fs *c,
u64 remap_sectors,
u64 new_i_size, s64 *i_sectors_delta)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter dst_iter, src_iter;
struct bkey_s_c src_k;
struct bkey_buf new_dst, new_src;
@@ -269,11 +275,11 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_init(&new_dst);
bch2_bkey_buf_init(&new_src);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
+ trans = bch2_trans_get(c);
- bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start,
+ bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
BTREE_ITER_INTENT);
- bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start,
+ bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
BTREE_ITER_INTENT);
while ((ret == 0 ||
@@ -281,21 +287,21 @@ s64 bch2_remap_range(struct bch_fs *c,
bkey_lt(dst_iter.pos, dst_end)) {
struct disk_reservation disk_res = { 0 };
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
if (fatal_signal_pending(current)) {
ret = -EINTR;
break;
}
- ret = bch2_subvolume_get_snapshot(&trans, src_inum.subvol,
+ ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol,
&src_snapshot);
if (ret)
continue;
bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
- ret = bch2_subvolume_get_snapshot(&trans, dst_inum.subvol,
+ ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol,
&dst_snapshot);
if (ret)
continue;
@@ -312,7 +318,7 @@ s64 bch2_remap_range(struct bch_fs *c,
continue;
if (bkey_lt(src_want, src_iter.pos)) {
- ret = bch2_fpunch_at(&trans, &dst_iter, dst_inum,
+ ret = bch2_fpunch_at(trans, &dst_iter, dst_inum,
min(dst_end.offset,
dst_iter.pos.offset +
src_iter.pos.offset - src_want.offset),
@@ -326,7 +332,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
- ret = bch2_make_extent_indirect(&trans, &src_iter,
+ ret = bch2_make_extent_indirect(trans, &src_iter,
new_src.k);
if (ret)
continue;
@@ -354,14 +360,14 @@ s64 bch2_remap_range(struct bch_fs *c,
min(src_k.k->p.offset - src_want.offset,
dst_end.offset - dst_iter.pos.offset));
- ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
+ ret = bch2_extent_update(trans, dst_inum, &dst_iter,
new_dst.k, &disk_res,
new_i_size, i_sectors_delta,
true);
bch2_disk_reservation_put(c, &disk_res);
}
- bch2_trans_iter_exit(&trans, &dst_iter);
- bch2_trans_iter_exit(&trans, &src_iter);
+ bch2_trans_iter_exit(trans, &dst_iter);
+ bch2_trans_iter_exit(trans, &src_iter);
BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end));
BUG_ON(bkey_gt(dst_iter.pos, dst_end));
@@ -373,23 +379,23 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bch_inode_unpacked inode_u;
struct btree_iter inode_iter = { NULL };
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u,
+ ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u,
dst_inum, BTREE_ITER_INTENT);
if (!ret2 &&
inode_u.bi_size < new_i_size) {
inode_u.bi_size = new_i_size;
- ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
}
- bch2_trans_iter_exit(&trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &inode_iter);
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 5b591c5..dbef41c 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -429,7 +429,7 @@ out:
return ret;
err:
- bch_err(c, "error adding replicas entry: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "adding replicas entry");
goto out;
}
diff --git a/libbcachefs/six.c b/libbcachefs/six.c
index 14cffa6..458a1de 100644
--- a/libbcachefs/six.c
+++ b/libbcachefs/six.c
@@ -31,7 +31,6 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
#define SIX_LOCK_HELD_intent (1U << 26)
#define SIX_LOCK_HELD_write (1U << 27)
#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
-#define SIX_LOCK_WAITING_intent (1U << (28 + SIX_LOCK_intent))
#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
#define SIX_LOCK_NOSPIN (1U << 31)
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 03ae280..cdf9eda 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -163,8 +163,7 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
rcu_assign_pointer(c->snapshots, new);
c->snapshot_table_size = new_size;
- if (old)
- kvfree_rcu(old);
+ kvfree_rcu_mightsleep(old);
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
}
@@ -344,7 +343,7 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
BTREE_ITER_WITH_UPDATES, snapshot, s);
}
-int bch2_snapshot_live(struct btree_trans *trans, u32 id)
+static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
{
struct bch_snapshot v;
int ret;
@@ -371,7 +370,7 @@ int bch2_snapshot_live(struct btree_trans *trans, u32 id)
* it's part of such a linear chain: this correctly sets equivalence classes on
* startup if we run leaf to root (i.e. in natural key order).
*/
-int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
+static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
unsigned i, nr_live = 0, live_idx = 0;
@@ -488,18 +487,18 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
bch2_trans_iter_exit(trans, &iter);
if (!ret && !found) {
- struct bkey_i_subvolume *s;
+ struct bkey_i_subvolume *u;
*subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
- s = bch2_bkey_get_mut_typed(trans, &iter,
+ u = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, *subvol_id),
0, subvolume);
- ret = PTR_ERR_OR_ZERO(s);
+ ret = PTR_ERR_OR_ZERO(u);
if (ret)
return ret;
- SET_BCH_SUBVOLUME_SNAP(&s->v, false);
+ SET_BCH_SUBVOLUME_SNAP(&u->v, false);
}
return ret;
@@ -591,11 +590,11 @@ int bch2_check_snapshot_trees(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_snapshot_tree(&trans, &iter, k)));
+ check_snapshot_tree(trans, &iter, k)));
if (ret)
bch_err(c, "error %i checking snapshot trees", ret);
@@ -864,11 +863,11 @@ int bch2_check_snapshots(struct bch_fs *c)
* the parent's depth already be correct:
*/
ret = bch2_trans_run(c,
- for_each_btree_key_reverse_commit(&trans, iter,
+ for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_snapshot(&trans, &iter, k)));
+ check_snapshot(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -911,7 +910,7 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
swap(s->children[0], s->children[1]);
}
-int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
+static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
@@ -1072,6 +1071,10 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
goto err;
new_snapids[i] = iter.pos.offset;
+
+ mutex_lock(&c->snapshot_table_lock);
+ snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
+ mutex_unlock(&c->snapshot_table_lock);
}
err:
bch2_trans_iter_exit(trans, &iter);
@@ -1354,7 +1357,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
int bch2_delete_dead_snapshots(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_snapshot snap;
@@ -1366,35 +1369,35 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
ret = bch2_fs_read_write_early(c);
if (ret) {
- bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
return ret;
}
}
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
NULL, NULL, 0,
- bch2_delete_redundant_snapshot(&trans, &iter, k));
+ bch2_delete_redundant_snapshot(trans, &iter, k));
if (ret) {
- bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "deleting redundant snapshots");
goto err;
}
- for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k,
- bch2_snapshot_set_equiv(&trans, k));
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k,
+ bch2_snapshot_set_equiv(trans, k));
if (ret) {
- bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
goto err;
}
- for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_snapshot)
continue;
@@ -1406,7 +1409,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
break;
}
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (ret) {
bch_err_msg(c, ret, "walking snapshots");
@@ -1421,16 +1424,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!btree_type_has_snapshots(id))
continue;
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = for_each_btree_key_commit(trans, iter,
id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL, BTREE_INSERT_NOFAIL,
- snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
- for_each_btree_key_commit(&trans, iter,
+ snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
+ for_each_btree_key_commit(trans, iter,
id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL, BTREE_INSERT_NOFAIL,
- move_key_to_correct_snapshot(&trans, &iter, k));
+ move_key_to_correct_snapshot(trans, &iter, k));
bch2_disk_reservation_put(c, &res);
darray_exit(&equiv_seen);
@@ -1441,7 +1444,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
}
- for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot);
@@ -1449,23 +1452,23 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (equiv != snapshot)
snapshot_list_add(c, &deleted_interior, snapshot);
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
/*
* Fixing children of deleted snapshots can't be done completely
* atomically, if we crash between here and when we delete the interior
* nodes some depth fields will be off:
*/
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, POS_MIN,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_INTENT, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_fix_child_of_deleted_snapshot(&trans, &iter, k, &deleted_interior));
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
if (ret)
goto err;
darray_for_each(deleted, i) {
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(&trans, *i));
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
@@ -1473,8 +1476,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
darray_for_each(deleted_interior, i) {
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(&trans, *i));
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
@@ -1485,7 +1488,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
err:
darray_exit(&deleted_interior);
darray_exit(&deleted);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -1618,7 +1621,8 @@ int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_buf sk;
- int ret;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
@@ -1640,7 +1644,8 @@ int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
}
bch2_bkey_buf_exit(&sk, c);
- return ret;
+
+ return ret ?: trans_was_restarted(trans, restart_count);
}
int bch2_snapshots_read(struct bch_fs *c)
@@ -1650,11 +1655,11 @@ int bch2_snapshots_read(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
- for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
- bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
- bch2_snapshot_set_equiv(&trans, k)) ?:
- for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+ bch2_snapshot_set_equiv(trans, k)) ?:
+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
if (ret)
diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h
index dabc9b9..de215d9 100644
--- a/libbcachefs/snapshot.h
+++ b/libbcachefs/snapshot.h
@@ -235,8 +235,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
struct bch_subvolume *);
-int bch2_snapshot_live(struct btree_trans *trans, u32 id);
-int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k);
/* only exported for tests: */
int bch2_snapshot_node_create(struct btree_trans *, u32,
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 0214a98..caf2dd7 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -41,8 +41,7 @@ static int check_subvol(struct btree_trans *trans,
ret = bch2_subvolume_delete(trans, iter->pos.offset);
if (ret)
- bch_err(c, "error deleting subvolume %llu: %s",
- iter->pos.offset, bch2_err_str(ret));
+ bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
return ret ?: -BCH_ERR_transaction_restart_nested;
}
@@ -87,10 +86,10 @@ int bch2_check_subvols(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
+ for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_subvol(&trans, &iter, k)));
+ check_subvol(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -99,7 +98,7 @@ int bch2_check_subvols(struct bch_fs *c)
/* Subvolumes: */
int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, struct printbuf *err)
+ enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
@@ -294,9 +293,9 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
bch2_evict_subvolume_inodes(c, &s);
for (id = s.data; id < s.data + s.nr; id++) {
- ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id));
+ ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
if (ret) {
- bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
+ bch_err_msg(c, ret, "deleting subvolume %u", *id);
break;
}
}
diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h
index 8d4c50f..bb14f92 100644
--- a/libbcachefs/subvolume.h
+++ b/libbcachefs/subvolume.h
@@ -10,7 +10,7 @@ enum bkey_invalid_flags;
int bch2_check_subvols(struct bch_fs *);
int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c,
- unsigned, struct printbuf *);
+ enum bkey_invalid_flags, struct printbuf *);
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index b6021b7..c9bf342 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -6,7 +6,6 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "io.h"
#include "journal.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
@@ -23,6 +22,9 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
+static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
+};
+
struct bch2_metadata_version {
u16 version;
const char *name;
@@ -161,7 +163,8 @@ void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev))
- blkdev_put(sb->bdev, sb->mode);
+ blkdev_put(sb->bdev, sb->holder);
+ kfree(sb->holder);
kfree(sb->sb);
memset(sb, 0, sizeof(*sb));
@@ -182,7 +185,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
if (sb->sb && sb->buffer_size >= new_buffer_size)
return 0;
- if (sb->have_layout) {
+ if (sb->sb && sb->have_layout) {
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
if (new_bytes > max_bytes) {
@@ -243,9 +246,9 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
/* XXX: we're not checking that offline device have enough space */
for_each_online_member(ca, c, i) {
- struct bch_sb_handle *sb = &ca->disk_sb;
+ struct bch_sb_handle *dev_sb = &ca->disk_sb;
- if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
+ if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
percpu_ref_put(&ca->ref);
return NULL;
}
@@ -381,7 +384,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
}
if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) {
- prt_printf(out, "Bad intenal UUID (got zeroes)");
+ prt_printf(out, "Bad internal UUID (got zeroes)");
return -BCH_ERR_invalid_sb_uuid;
}
@@ -664,27 +667,30 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
retry:
#endif
memset(sb, 0, sizeof(*sb));
- sb->mode = FMODE_READ;
+ sb->mode = BLK_OPEN_READ;
sb->have_bio = true;
+ sb->holder = kmalloc(1, GFP_KERNEL);
+ if (!sb->holder)
+ return -ENOMEM;
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
- sb->mode |= FMODE_BUFFERED;
+ sb->mode |= BLK_OPEN_BUFFERED;
#endif
if (!opt_get(*opts, noexcl))
- sb->mode |= FMODE_EXCL;
+ sb->mode |= BLK_OPEN_EXCL;
if (!opt_get(*opts, nochanges))
- sb->mode |= FMODE_WRITE;
+ sb->mode |= BLK_OPEN_WRITE;
- sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (IS_ERR(sb->bdev) &&
PTR_ERR(sb->bdev) == -EACCES &&
opt_get(*opts, read_only)) {
- sb->mode &= ~FMODE_WRITE;
+ sb->mode &= ~BLK_OPEN_WRITE;
- sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (!IS_ERR(sb->bdev))
opt_set(*opts, nochanges, true);
}
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index e7dbc31..e94a63a 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -35,7 +35,8 @@
#include "fs-io-direct.h"
#include "fsck.h"
#include "inode.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
@@ -68,6 +69,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
+MODULE_DESCRIPTION("bcachefs filesystem");
#define KTYPE(type) \
static const struct attribute_group type ## _group = { \
@@ -421,6 +423,10 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
return ret;
}
+ ret = bch2_journal_reclaim_start(&c->journal);
+ if (ret)
+ goto err;
+
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
@@ -430,7 +436,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_reinit(&c->writes);
#else
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
+ for (i = 0; i < BCH_WRITE_REF_NR; i++) {
BUG_ON(atomic_long_read(&c->writes[i]));
atomic_long_inc(&c->writes[i]);
}
@@ -465,7 +471,6 @@ int bch2_fs_read_write_early(struct bch_fs *c)
static void __bch2_fs_free(struct bch_fs *c)
{
unsigned i;
- int cpu;
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
@@ -479,7 +484,8 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_fsio_exit(c);
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
- bch2_fs_io_exit(c);
+ bch2_fs_io_write_exit(c);
+ bch2_fs_io_read_exit(c);
bch2_fs_buckets_waiting_for_journal_exit(c);
bch2_fs_btree_interior_update_exit(c);
bch2_fs_btree_iter_exit(c);
@@ -496,12 +502,7 @@ static void __bch2_fs_free(struct bch_fs *c)
percpu_free_rwsem(&c->mark_lock);
free_percpu(c->online_reserved);
- if (c->btree_paths_bufs)
- for_each_possible_cpu(cpu)
- kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path);
-
darray_exit(&c->btree_roots_extra);
- free_percpu(c->btree_paths_bufs);
free_percpu(c->pcpu);
mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool);
@@ -581,8 +582,6 @@ void bch2_fs_free(struct bch_fs *c)
{
unsigned i;
- BUG_ON(!test_bit(BCH_FS_STOPPING, &c->flags));
-
mutex_lock(&bch_fs_list_lock);
list_del(&c->list);
mutex_unlock(&bch_fs_list_lock);
@@ -787,6 +786,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
if (c->opts.inodes_use_key_cache)
c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes;
+ c->btree_key_cache_btrees |= 1U << BTREE_ID_logged_ops;
c->block_bits = ilog2(block_sectors(c));
c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
@@ -824,7 +824,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
BIOSET_NEED_BVECS) ||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
!(c->online_reserved = alloc_percpu(u64)) ||
- !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
@@ -846,13 +845,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_subvolumes_init(c) ?:
- bch2_fs_io_init(c) ?:
+ bch2_fs_io_read_init(c) ?:
+ bch2_fs_io_write_init(c) ?:
bch2_fs_nocow_locking_init(c) ?:
bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?:
bch2_fs_ec_init(c) ?:
bch2_fs_fsio_init(c) ?:
- bch2_fs_fs_io_buffered_init(c);
+ bch2_fs_fs_io_buffered_init(c) ?:
bch2_fs_fs_io_direct_init(c);
if (ret)
goto err;
@@ -990,7 +990,7 @@ out:
up_write(&c->state_lock);
return ret;
err:
- bch_err(c, "error starting filesystem: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "starting filesystem");
goto out;
}
@@ -1237,8 +1237,6 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
/* Commit: */
ca->disk_sb = *sb;
- if (sb->mode & FMODE_EXCL)
- ca->disk_sb.bdev->bd_holder = ca;
memset(sb, 0, sizeof(*sb));
ca->dev = ca->disk_sb.bdev->bd_dev;
@@ -1457,7 +1455,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
BTREE_TRIGGER_NORUN, NULL);
if (ret)
- bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "removing dev alloc info");
return ret;
}
@@ -1486,31 +1484,31 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
if (ret) {
- bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret));
+ bch_err_msg(ca, ret, "dropping data");
goto err;
}
ret = bch2_dev_remove_alloc(c, ca);
if (ret) {
- bch_err(ca, "Remove failed, error deleting alloc info");
+ bch_err_msg(ca, ret, "deleting alloc info");
goto err;
}
ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
if (ret) {
- bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret));
+ bch_err_msg(ca, ret, "flushing journal");
goto err;
}
ret = bch2_journal_flush(&c->journal);
if (ret) {
- bch_err(ca, "Remove failed, journal error");
+ bch_err(ca, "journal error");
goto err;
}
ret = bch2_replicas_gc2(c);
if (ret) {
- bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret));
+ bch_err_msg(ca, ret, "in replicas_gc2()");
goto err;
}
@@ -1585,7 +1583,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ret = bch2_read_super(path, &opts, &sb);
if (ret) {
- bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "reading super");
goto err;
}
@@ -1601,13 +1599,12 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ret = bch2_dev_may_add(sb.sb, c);
if (ret) {
- bch_err(c, "device add error: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
goto err;
}
ca = __bch2_dev_alloc(c, &dev_mi);
if (!ca) {
- bch2_free_super(&sb);
ret = -ENOMEM;
goto err;
}
@@ -1615,14 +1612,12 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
bch2_dev_usage_init(ca);
ret = __bch2_dev_attach_bdev(ca, &sb);
- if (ret) {
- bch2_dev_free(ca);
+ if (ret)
goto err;
- }
ret = bch2_dev_journal_alloc(ca);
if (ret) {
- bch_err(c, "device add error: journal alloc failed");
+ bch_err_msg(c, ret, "allocating journal");
goto err;
}
@@ -1631,7 +1626,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ret = bch2_sb_from_fs(c, ca);
if (ret) {
- bch_err(c, "device add error: new device superblock too small");
+ bch_err_msg(c, ret, "setting up new superblock");
goto err_unlock;
}
@@ -1640,8 +1635,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (!bch2_sb_resize_members(&ca->disk_sb,
le32_to_cpu(mi->field.u64s) +
sizeof(dev_mi) / sizeof(u64))) {
- bch_err(c, "device add error: new device superblock too small");
ret = -BCH_ERR_ENOSPC_sb_members;
+ bch_err_msg(c, ret, "setting up new superblock");
goto err_unlock;
}
@@ -1653,8 +1648,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
goto have_slot;
no_slot:
- bch_err(c, "device add error: already have maximum number of devices");
ret = -BCH_ERR_ENOSPC_sb_members;
+ bch_err_msg(c, ret, "setting up new superblock");
goto err_unlock;
have_slot:
@@ -1664,8 +1659,8 @@ have_slot:
mi = bch2_sb_resize_members(&c->disk_sb, u64s);
if (!mi) {
- bch_err(c, "device add error: no room in superblock for member info");
ret = -BCH_ERR_ENOSPC_sb_members;
+ bch_err_msg(c, ret, "setting up new superblock");
goto err_unlock;
}
@@ -1681,7 +1676,7 @@ have_slot:
if (BCH_MEMBER_GROUP(&dev_mi)) {
ret = __bch2_dev_group_set(c, ca, label.buf);
if (ret) {
- bch_err(c, "device add error: error setting label");
+ bch_err_msg(c, ret, "creating new label");
goto err_unlock;
}
}
@@ -1693,13 +1688,13 @@ have_slot:
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret) {
- bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "marking new superblock");
goto err_late;
}
ret = bch2_fs_freespace_init(c);
if (ret) {
- bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "initializing free space");
goto err_late;
}
@@ -1749,7 +1744,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
if (ret) {
- bch_err(c, "error bringing %s online: %s", path, bch2_err_str(ret));
+ bch_err_msg(c, ret, "bringing %s online", path);
goto err;
}
@@ -1761,8 +1756,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret) {
- bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s",
- path, bch2_err_str(ret));
+ bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path);
goto err;
}
@@ -1780,7 +1774,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
ret = bch2_fs_freespace_init(c);
if (ret)
- bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret));
+ bch_err_msg(c, ret, "initializing free space");
up_write(&c->state_lock);
return 0;
@@ -1835,7 +1829,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
ret = bch2_dev_buckets_resize(c, ca, nbuckets);
if (ret) {
- bch_err(ca, "Resize error: %s", bch2_err_str(ret));
+ bch_err_msg(ca, ret, "resizing buckets");
goto err;
}
diff --git a/libbcachefs/super_types.h b/libbcachefs/super_types.h
index 89419fc..597a8db 100644
--- a/libbcachefs/super_types.h
+++ b/libbcachefs/super_types.h
@@ -6,8 +6,9 @@ struct bch_sb_handle {
struct bch_sb *sb;
struct block_device *bdev;
struct bio *bio;
+ void *holder;
size_t buffer_size;
- fmode_t mode;
+ blk_mode_t mode;
unsigned have_layout:1;
unsigned have_bio:1;
unsigned fs_sb:1;
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 941f4bc..1abc61c 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -113,10 +113,6 @@ do { \
prt_human_readable_s64(out, val); \
} while (0)
-#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var))
-#define var_print(_var) sysfs_print(_var, var(_var))
-#define var_hprint(_var) sysfs_hprint(_var, var(_var))
-
#define sysfs_strtoul(file, var) \
do { \
if (attr == &sysfs_ ## file) \
@@ -139,30 +135,6 @@ do { \
_v; \
})
-#define strtoul_restrict_or_return(cp, min, max) \
-({ \
- unsigned long __v = 0; \
- int _r = strtoul_safe_restrict(cp, __v, min, max); \
- if (_r) \
- return _r; \
- __v; \
-})
-
-#define strtoi_h_or_return(cp) \
-({ \
- u64 _v; \
- int _r = strtoi_h(cp, &_v); \
- if (_r) \
- return _r; \
- _v; \
-})
-
-#define sysfs_hatoi(file, var) \
-do { \
- if (attr == &sysfs_ ## file) \
- return strtoi_h(buf, &var) ?: (ssize_t) size; \
-} while (0)
-
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
@@ -280,7 +252,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
enum btree_id id;
@@ -291,18 +263,18 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
incompressible_sectors = 0,
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
- int ret;
+ int ret = 0;
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
for (id = 0; id < BTREE_ID_NR; id++) {
if (!btree_type_has_ptrs(id))
continue;
- for_each_btree_key(&trans, iter, id, POS_MIN,
+ for_each_btree_key(trans, iter, id, POS_MIN,
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -336,10 +308,10 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
else if (compressed)
nr_compressed_extents++;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
return ret;
@@ -1005,7 +977,7 @@ STORE(bch2_dev)
mutex_lock(&c->sb_lock);
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
- if (v != BCH_MEMBER_DURABILITY(mi)) {
+ if (v + 1 != BCH_MEMBER_DURABILITY(mi)) {
SET_BCH_MEMBER_DURABILITY(mi, v + 1);
bch2_write_super(c);
}
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index 72389c7..c907b3e 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -31,7 +31,7 @@ static void delete_test_keys(struct bch_fs *c)
static int test_delete(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_i_cookie k;
int ret;
@@ -39,44 +39,43 @@ static int test_delete(struct bch_fs *c, u64 nr)
bkey_cookie_init(&k.k_i);
k.k.p.snapshot = U32_MAX;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
BTREE_ITER_INTENT);
- ret = commit_do(&trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, &k.k_i, 0));
+ bch2_trans_update(trans, &iter, &k.k_i, 0));
if (ret) {
bch_err_msg(c, ret, "update error");
goto err;
}
pr_info("deleting once");
- ret = commit_do(&trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
- bch2_btree_delete_at(&trans, &iter, 0));
+ bch2_btree_delete_at(trans, &iter, 0));
if (ret) {
bch_err_msg(c, ret, "delete error (first)");
goto err;
}
pr_info("deleting twice");
- ret = commit_do(&trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
- bch2_btree_delete_at(&trans, &iter, 0));
+ bch2_btree_delete_at(trans, &iter, 0));
if (ret) {
bch_err_msg(c, ret, "delete error (second)");
goto err;
}
err:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
static int test_delete_written(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_i_cookie k;
int ret;
@@ -84,58 +83,53 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
bkey_cookie_init(&k.k_i);
k.k.p.snapshot = U32_MAX;
- bch2_trans_init(&trans, c, 0, 0);
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
BTREE_ITER_INTENT);
- ret = commit_do(&trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, &k.k_i, 0));
+ bch2_trans_update(trans, &iter, &k.k_i, 0));
if (ret) {
bch_err_msg(c, ret, "update error");
goto err;
}
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
bch2_journal_flush_all_pins(&c->journal);
- ret = commit_do(&trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
- bch2_btree_delete_at(&trans, &iter, 0));
+ bch2_btree_delete_at(trans, &iter, 0));
if (ret) {
bch_err_msg(c, ret, "delete error");
goto err;
}
err:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
static int test_iterate(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c k;
u64 i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
delete_test_keys(c);
pr_info("inserting test keys");
for (i = 0; i < nr; i++) {
- struct bkey_i_cookie k;
+ struct bkey_i_cookie ck;
- bkey_cookie_init(&k.k_i);
- k.k.p.offset = i;
- k.k.p.snapshot = U32_MAX;
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i;
+ ck.k.p.snapshot = U32_MAX;
- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "insert error");
goto err;
@@ -146,7 +140,7 @@ static int test_iterate(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(k.k->p.offset != i++);
@@ -161,7 +155,7 @@ static int test_iterate(struct bch_fs *c, u64 nr)
pr_info("iterating backwards");
- ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_xattrs,
+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs,
SPOS(0, U64_MAX, U32_MAX), 0, k,
({
BUG_ON(k.k->p.offset != --i);
@@ -174,35 +168,32 @@ static int test_iterate(struct bch_fs *c, u64 nr)
BUG_ON(i);
err:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
static int test_iterate_extents(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c k;
u64 i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
delete_test_keys(c);
pr_info("inserting test extents");
for (i = 0; i < nr; i += 8) {
- struct bkey_i_cookie k;
+ struct bkey_i_cookie ck;
- bkey_cookie_init(&k.k_i);
- k.k.p.offset = i + 8;
- k.k.p.snapshot = U32_MAX;
- k.k.size = 8;
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i + 8;
+ ck.k.p.snapshot = U32_MAX;
+ ck.k.size = 8;
- ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "insert error");
goto err;
@@ -213,7 +204,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(bkey_start_offset(k.k) != i);
@@ -229,7 +220,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
pr_info("iterating backwards");
- ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents,
SPOS(0, U64_MAX, U32_MAX), 0, k,
({
BUG_ON(k.k->p.offset != i);
@@ -243,34 +234,31 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
BUG_ON(i);
err:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
static int test_iterate_slots(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c k;
u64 i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
delete_test_keys(c);
pr_info("inserting test keys");
for (i = 0; i < nr; i++) {
- struct bkey_i_cookie k;
+ struct bkey_i_cookie ck;
- bkey_cookie_init(&k.k_i);
- k.k.p.offset = i * 2;
- k.k.p.snapshot = U32_MAX;
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i * 2;
+ ck.k.p.snapshot = U32_MAX;
- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "insert error");
goto err;
@@ -281,7 +269,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(k.k->p.offset != i);
@@ -299,7 +287,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
BTREE_ITER_SLOTS, k, ({
if (i >= nr * 2)
@@ -317,34 +305,31 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
}
ret = 0;
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c k;
u64 i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
delete_test_keys(c);
pr_info("inserting test keys");
for (i = 0; i < nr; i += 16) {
- struct bkey_i_cookie k;
+ struct bkey_i_cookie ck;
- bkey_cookie_init(&k.k_i);
- k.k.p.offset = i + 16;
- k.k.p.snapshot = U32_MAX;
- k.k.size = 8;
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i + 16;
+ ck.k.p.snapshot = U32_MAX;
+ ck.k.size = 8;
- ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "insert error");
goto err;
@@ -355,7 +340,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k, ({
BUG_ON(bkey_start_offset(k.k) != i + 8);
@@ -374,7 +359,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
- ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents,
+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
BTREE_ITER_SLOTS, k, ({
if (i == nr)
@@ -392,7 +377,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
}
ret = 0;
err:
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return 0;
}
@@ -402,43 +387,41 @@ err:
*/
static int test_peek_end(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return 0;
}
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0);
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k);
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return 0;
}
@@ -458,8 +441,7 @@ static int insert_test_extent(struct bch_fs *c,
k.k_i.k.size = end - start;
k.k_i.k.version.lo = test_version++;
- ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0);
if (ret)
bch_err_fn(c, ret);
return ret;
@@ -515,7 +497,7 @@ static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start,
k.k_i.k.size = len;
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i,
+ bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
if (ret)
bch_err_fn(c, ret);
@@ -538,7 +520,7 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
/* Test skipping over keys in unrelated snapshots: */
static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i_cookie cookie;
@@ -546,20 +528,19 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
bkey_cookie_init(&cookie.k_i);
cookie.k.p.snapshot = snapid_hi;
- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
if (ret)
return ret;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
+ trans = bch2_trans_get(c);
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, snapid_lo), 0);
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
BUG_ON(k.k->p.snapshot != U32_MAX);
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
@@ -572,13 +553,12 @@ static int test_snapshots(struct bch_fs *c, u64 nr)
bkey_cookie_init(&cookie.k_i);
cookie.k.p.snapshot = U32_MAX;
- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i,
- NULL, NULL, 0);
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
if (ret)
return ret;
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_snapshot_node_create(&trans, U32_MAX,
+ bch2_snapshot_node_create(trans, U32_MAX,
snapids,
snapid_subvols,
2));
@@ -609,38 +589,34 @@ static u64 test_rand(void)
static int rand_insert(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bkey_i_cookie k;
int ret = 0;
u64 i;
- bch2_trans_init(&trans, c, 0, 0);
-
for (i = 0; i < nr; i++) {
bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand();
k.k.p.snapshot = U32_MAX;
- ret = commit_do(&trans, NULL, NULL, 0,
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i, 0));
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0));
if (ret)
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int rand_insert_multi(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct bkey_i_cookie k[8];
int ret = 0;
unsigned j;
u64 i;
- bch2_trans_init(&trans, c, 0, 0);
-
for (i = 0; i < nr; i += ARRAY_SIZE(k)) {
for (j = 0; j < ARRAY_SIZE(k); j++) {
bkey_cookie_init(&k[j].k_i);
@@ -648,46 +624,45 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
k[j].k.p.snapshot = U32_MAX;
}
- ret = commit_do(&trans, NULL, NULL, 0,
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
- __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i, 0));
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0));
if (ret)
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int rand_lookup(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
u64 i;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
for (i = 0; i < nr; i++) {
bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
- lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
ret = bkey_err(k);
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
@@ -719,26 +694,25 @@ static int rand_mixed_trans(struct btree_trans *trans,
static int rand_mixed(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_i_cookie cookie;
int ret = 0;
u64 i, rand;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
for (i = 0; i < nr; i++) {
rand = test_rand();
- ret = commit_do(&trans, NULL, NULL, 0,
- rand_mixed_trans(&trans, &iter, &cookie, i, rand));
+ ret = commit_do(trans, NULL, NULL, 0,
+ rand_mixed_trans(trans, &iter, &cookie, i, rand));
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
return ret;
}
@@ -766,22 +740,20 @@ err:
static int rand_delete(struct bch_fs *c, u64 nr)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
int ret = 0;
u64 i;
- bch2_trans_init(&trans, c, 0, 0);
-
for (i = 0; i < nr; i++) {
struct bpos pos = SPOS(0, test_rand(), U32_MAX);
- ret = commit_do(&trans, NULL, NULL, 0,
- __do_delete(&trans, pos));
+ ret = commit_do(trans, NULL, NULL, 0,
+ __do_delete(trans, pos));
if (ret)
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
@@ -794,14 +766,14 @@ static int seq_insert(struct bch_fs *c, u64 nr)
bkey_cookie_init(&insert.k_i);
return bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k,
NULL, NULL, 0, ({
if (iter.pos.offset >= nr)
break;
insert.k.p = iter.pos;
- bch2_trans_update(&trans, &iter, &insert.k_i, 0);
+ bch2_trans_update(trans, &iter, &insert.k_i, 0);
})));
}
@@ -811,7 +783,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
struct bkey_s_c k;
return bch2_trans_run(c,
- for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs,
+ for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
0, k,
0));
@@ -823,14 +795,14 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
struct bkey_s_c k;
return bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX),
BTREE_ITER_INTENT, k,
NULL, NULL, 0, ({
struct bkey_i_cookie u;
bkey_reassemble(&u.k_i, k);
- bch2_trans_update(&trans, &iter, &u.k_i, 0);
+ bch2_trans_update(trans, &iter, &u.k_i, 0);
})));
}
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 97fe774..1926449 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -137,6 +137,25 @@ DEFINE_EVENT(bio, read_promote,
TP_ARGS(bio)
);
+TRACE_EVENT(read_nopromote,
+ TP_PROTO(struct bch_fs *c, int ret),
+ TP_ARGS(c, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, ret, 32 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
+ ),
+
+ TP_printk("%d,%d ret %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ret)
+);
+
DEFINE_EVENT(bio, read_bounce,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 80a6c56..adeec80 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -112,10 +112,10 @@ got_unit:
#define parse_or_ret(cp, _f) \
do { \
- int ret = _f; \
- if (ret < 0) \
- return ret; \
- cp += ret; \
+ int _ret = _f; \
+ if (_ret < 0) \
+ return _ret; \
+ cp += _ret; \
} while (0)
static int __bch2_strtou64_h(const char *cp, u64 *res)
@@ -605,11 +605,9 @@ void bch2_time_stats_init(struct bch2_time_stats *stats)
/**
* bch2_ratelimit_delay() - return how long to delay until the next time to do
- * some work
- *
- * @d - the struct bch_ratelimit to update
- *
- * Returns the amount of time to delay by, in jiffies
+ * some work
+ * @d: the struct bch_ratelimit to update
+ * Returns: the amount of time to delay by, in jiffies
*/
u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
{
@@ -622,9 +620,8 @@ u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
/**
* bch2_ratelimit_increment() - increment @d by the amount of work done
- *
- * @d - the struct bch_ratelimit to update
- * @done - the amount of work done, in arbitrary units
+ * @d: the struct bch_ratelimit to update
+ * @done: the amount of work done, in arbitrary units
*/
void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
{
@@ -761,10 +758,10 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
}
}
-int bch2_bio_alloc_pages_noprof(struct bio *bio, size_t size, gfp_t gfp_mask)
+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
{
while (size) {
- struct page *page = alloc_pages_noprof(gfp_mask, 0);
+ struct page *page = alloc_pages(gfp_mask, 0);
unsigned len = min_t(size_t, PAGE_SIZE, size);
if (!page)
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index d06671a..67f1a1d 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -60,13 +60,12 @@ static inline void vpfree(void *p, size_t size)
free_pages((unsigned long) p, get_order(size));
}
-static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask)
+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
{
- return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN,
- get_order(size)) ?:
- __vmalloc_noprof(size, gfp_mask);
+ return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
+ get_order(size)) ?:
+ __vmalloc(size, gfp_mask);
}
-#define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp))
static inline void kvpfree(void *p, size_t size)
{
@@ -76,13 +75,12 @@ static inline void kvpfree(void *p, size_t size)
vpfree(p, size);
}
-static inline void *kvpmalloc_noprof(size_t size, gfp_t gfp_mask)
+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
{
return size < PAGE_SIZE
- ? kmalloc_noprof(size, gfp_mask)
- : vpmalloc_noprof(size, gfp_mask);
+ ? kmalloc(size, gfp_mask)
+ : vpmalloc(size, gfp_mask);
}
-#define kvpmalloc(_size, _gfp) alloc_hooks(kvpmalloc_noprof(_size, _gfp))
int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
@@ -534,9 +532,7 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
}
void bch2_bio_map(struct bio *bio, void *base, size_t);
-int bch2_bio_alloc_pages_noprof(struct bio *, size_t, gfp_t);
-#define bch2_bio_alloc_pages(_bio, _size, _gfp) \
- alloc_hooks(bch2_bio_alloc_pages_noprof(_bio, _size, _gfp))
+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
static inline sector_t bdev_sectors(struct block_device *bdev)
{
@@ -779,12 +775,12 @@ static inline void __move_gap(void *array, size_t element_size,
#define bubble_sort(_base, _nr, _cmp) \
do { \
- ssize_t _i, _end; \
+ ssize_t _i, _last; \
bool _swapped = true; \
\
- for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
+ for (_last= (ssize_t) (_nr) - 1; _last > 0 && _swapped; --_last) {\
_swapped = false; \
- for (_i = 0; _i < _end; _i++) \
+ for (_i = 0; _i < _last; _i++) \
if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \
swap((_base)[_i], (_base)[_i + 1]); \
_swapped = true; \
diff --git a/libbcachefs/varint.c b/libbcachefs/varint.c
index 2a2ab86..cb4f33e 100644
--- a/libbcachefs/varint.c
+++ b/libbcachefs/varint.c
@@ -13,10 +13,9 @@
/**
* bch2_varint_encode - encode a variable length integer
- * @out - destination to encode to
- * @v - unsigned integer to encode
- *
- * Returns the size in bytes of the encoded integer - at most 9 bytes
+ * @out: destination to encode to
+ * @v: unsigned integer to encode
+ * Returns: size in bytes of the encoded integer - at most 9 bytes
*/
int bch2_varint_encode(u8 *out, u64 v)
{
@@ -40,11 +39,10 @@ int bch2_varint_encode(u8 *out, u64 v)
/**
* bch2_varint_decode - encode a variable length integer
- * @in - varint to decode
- * @end - end of buffer to decode from
- * @out - on success, decoded integer
- *
- * Returns the size in bytes of the decoded integer - or -1 on failure (would
+ * @in: varint to decode
+ * @end: end of buffer to decode from
+ * @out: on success, decoded integer
+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
* have read past the end of the buffer)
*/
int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
@@ -73,6 +71,9 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
/**
* bch2_varint_encode_fast - fast version of bch2_varint_encode
+ * @out: destination to encode to
+ * @v: unsigned integer to encode
+ * Returns: size in bytes of the encoded integer - at most 9 bytes
*
* This version assumes it's always safe to write 8 bytes to @out, even if the
* encoded integer would be smaller.
@@ -96,6 +97,11 @@ int bch2_varint_encode_fast(u8 *out, u64 v)
/**
* bch2_varint_decode_fast - fast version of bch2_varint_decode
+ * @in: varint to decode
+ * @end: end of buffer to decode from
+ * @out: on success, decoded integer
+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
+ * have read past the end of the buffer)
*
* This version assumes that it is safe to read at most 8 bytes past the end of
* @end (we still return an error if the varint extends past @end).
diff --git a/libbcachefs/vstructs.h b/libbcachefs/vstructs.h
index 53a694d..a6561b4 100644
--- a/libbcachefs/vstructs.h
+++ b/libbcachefs/vstructs.h
@@ -41,11 +41,11 @@
(round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
#define vstruct_next(_s) \
- ((typeof(_s)) ((_s)->_data + __vstruct_u64s(_s)))
+ ((typeof(_s)) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
#define vstruct_last(_s) \
- ((typeof(&(_s)->start[0])) ((_s)->_data + __vstruct_u64s(_s)))
+ ((typeof(&(_s)->start[0])) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
#define vstruct_end(_s) \
- ((void *) ((_s)->_data + __vstruct_u64s(_s)))
+ ((void *) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
#define vstruct_for_each(_s, _i) \
for (_i = (_s)->start; \
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index 6f6b3ca..b069b1a 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "acl.h"
#include "bkey_methods.h"
#include "btree_update.h"
#include "extents.h"
@@ -130,6 +131,13 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
xattr.v->x_name,
le16_to_cpu(xattr.v->x_val_len),
(char *) xattr_val(xattr.v));
+
+ if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
+ xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT) {
+ prt_char(out, ' ');
+ bch2_acl_to_text(out, xattr_val(xattr.v),
+ le16_to_cpu(xattr.v->x_val_len));
+ }
}
static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode,
@@ -299,24 +307,22 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct bch_fs *c = dentry->d_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
u64 offset = 0, inum = inode->ei_inode.bi_inum;
u32 snapshot;
int ret;
-
- bch2_trans_init(&trans, c, 0, 0);
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
iter = (struct btree_iter) { NULL };
- ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
if (ret)
goto err;
- for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs,
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot),
POS(inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_xattr)
@@ -328,12 +334,12 @@ retry:
}
offset = iter.pos.offset;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
goto out;
@@ -358,7 +364,7 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler,
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_xattr_get_trans(&trans, inode, name, buffer, size, handler->flags));
+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
return bch2_err_class(ret);
}
@@ -373,18 +379,14 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct bch_inode_unpacked inode_u;
- struct btree_trans trans;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_xattr_set(&trans, inode_inum(inode), &inode_u,
+ ret = bch2_trans_run(c,
+ commit_do(trans, NULL, NULL, 0,
+ bch2_xattr_set(trans, inode_inum(inode), &inode_u,
&hash, name, value, size,
- handler->flags, flags));
- if (!ret)
- bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
- bch2_trans_exit(&trans);
+ handler->flags, flags)) ?:
+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0));
return bch2_err_class(ret);
}
diff --git a/linux/blkdev.c b/linux/blkdev.c
index ea901a4..54af9f8 100644
--- a/linux/blkdev.c
+++ b/linux/blkdev.c
@@ -162,7 +162,7 @@ sector_t get_capacity(struct gendisk *disk)
return bytes >> 9;
}
-void blkdev_put(struct block_device *bdev, fmode_t mode)
+void blkdev_put(struct block_device *bdev, void *holder)
{
fdatasync(bdev->bd_fd);
close(bdev->bd_sync_fd);
@@ -170,25 +170,25 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
free(bdev);
}
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder)
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hop)
{
struct block_device *bdev;
int fd, sync_fd, buffered_fd, flags = 0;
- if ((mode & (FMODE_READ|FMODE_WRITE)) == (FMODE_READ|FMODE_WRITE))
+ if ((mode & (BLK_OPEN_READ|BLK_OPEN_WRITE)) == (BLK_OPEN_READ|BLK_OPEN_WRITE))
flags = O_RDWR;
- else if (mode & FMODE_READ)
+ else if (mode & BLK_OPEN_READ)
flags = O_RDONLY;
- else if (mode & FMODE_WRITE)
+ else if (mode & BLK_OPEN_WRITE)
flags = O_WRONLY;
- if (!(mode & FMODE_BUFFERED))
+ if (!(mode & BLK_OPEN_BUFFERED))
flags |= O_DIRECT;
#if 0
/* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
- if (mode & FMODE_EXCL)
+ if (mode & BLK_OPEN_EXCL)
flags |= O_EXCL;
#endif
buffered_fd = open(path, flags & ~O_DIRECT);
diff --git a/rust-src/bch_bindgen/src/bkey.rs b/rust-src/bch_bindgen/src/bkey.rs
index 64697ea..d483083 100644
--- a/rust-src/bch_bindgen/src/bkey.rs
+++ b/rust-src/bch_bindgen/src/bkey.rs
@@ -47,6 +47,8 @@ pub enum BkeyValC<'a> {
inode_v3(&'a c::bch_inode_v3),
bucket_gens(&'a c::bch_bucket_gens),
snapshot_tree(&'a c::bch_snapshot_tree),
+ logged_op_truncate(&'a c::bch_logged_op_truncate),
+ logged_op_finsert(&'a c::bch_logged_op_finsert),
}
impl<'a, 'b> BkeySC<'a> {
@@ -96,6 +98,8 @@ impl<'a, 'b> BkeySC<'a> {
KEY_TYPE_inode_v3 => inode_v3(unsafe { transmute(self.v) }),
KEY_TYPE_bucket_gens => bucket_gens(unsafe { transmute(self.v) }),
KEY_TYPE_snapshot_tree => snapshot_tree(unsafe { transmute(self.v) }),
+ KEY_TYPE_logged_op_truncate => logged_op_truncate(unsafe { transmute(self.v) }),
+ KEY_TYPE_logged_op_finsert => logged_op_finsert(unsafe { transmute(self.v) }),
KEY_TYPE_MAX => unreachable!(),
}
}
diff --git a/rust-src/bch_bindgen/src/btree.rs b/rust-src/bch_bindgen/src/btree.rs
index 32b4e74..f738a46 100644
--- a/rust-src/bch_bindgen/src/btree.rs
+++ b/rust-src/bch_bindgen/src/btree.rs
@@ -11,24 +11,21 @@ use std::ptr;
use bitflags::bitflags;
pub struct BtreeTrans<'f> {
- raw: c::btree_trans,
+ raw: *mut c::btree_trans,
fs: PhantomData<&'f Fs>
}
impl<'f> BtreeTrans<'f> {
pub fn new(fs: &'f Fs) -> BtreeTrans {
unsafe {
- let mut trans: MaybeUninit<c::btree_trans> = MaybeUninit::uninit();
-
- c::__bch2_trans_init(&mut (*trans.as_mut_ptr()), fs.raw, 0);
- BtreeTrans { raw: trans.assume_init(), fs: PhantomData }
+ BtreeTrans { raw: &mut *c::__bch2_trans_get(fs.raw, 0), fs: PhantomData }
}
}
}
impl<'f> Drop for BtreeTrans<'f> {
fn drop(&mut self) {
- unsafe { c::bch2_trans_exit(&mut self.raw) }
+ unsafe { c::bch2_trans_put(&mut *self.raw) }
}
}
@@ -64,9 +61,9 @@ impl<'t> BtreeIter<'t> {
let mut iter: MaybeUninit<c::btree_iter> = MaybeUninit::uninit();
c::bch2_trans_iter_init_outlined(
- ptr::addr_of!(trans.raw).cast_mut(),
+ trans.raw,
iter.as_mut_ptr(),
- btree as u32,
+ btree,
pos,
flags.bits as u32);
@@ -123,7 +120,7 @@ impl<'t> BtreeNodeIter<'t> {
unsafe {
let mut iter: MaybeUninit<c::btree_iter> = MaybeUninit::uninit();
c::bch2_trans_node_iter_init(
- ptr::addr_of!(trans.raw).cast_mut(),
+ trans.raw,
iter.as_mut_ptr(),
btree,
pos,
diff --git a/rust-src/bch_bindgen/src/libbcachefs_wrapper.h b/rust-src/bch_bindgen/src/libbcachefs_wrapper.h
index e7bcfcf..e68de66 100644
--- a/rust-src/bch_bindgen/src/libbcachefs_wrapper.h
+++ b/rust-src/bch_bindgen/src/libbcachefs_wrapper.h
@@ -13,8 +13,8 @@
#include "../include/linux/blkdev.h"
-#define MARK_FIX_753(req_name) const fmode_t Fix753_##req_name = req_name;
+#define MARK_FIX_753(req_name) const blk_mode_t Fix753_##req_name = req_name;
-MARK_FIX_753(FMODE_READ);
-MARK_FIX_753(FMODE_WRITE);
-MARK_FIX_753(FMODE_EXCL); \ No newline at end of file
+MARK_FIX_753(BLK_OPEN_READ);
+MARK_FIX_753(BLK_OPEN_WRITE);
+MARK_FIX_753(BLK_OPEN_EXCL);