summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-04-20 17:01:39 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-04-21 15:47:15 -0400
commit6a83d70ef53042aa12bf0ea1f08b7b237783f114 (patch)
treef6442be818d4f7f043b4cb2b8caf34ea51ceeb92
parent98f2c06d418d85557f39c6541295766457d68d53 (diff)
Update bcachefs sources to c9d875f9be1f bcachefs: Casefold is now a regular opts.h option
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--Makefile1
-rw-r--r--Makefile.compiler4
-rw-r--r--c_src/cmd_device.c12
-rw-r--r--c_src/cmd_dump.c4
-rw-r--r--include/linux/completion.h1
-rw-r--r--libbcachefs/alloc_background.c68
-rw-r--r--libbcachefs/alloc_foreground.c10
-rw-r--r--libbcachefs/async_objs.h44
-rw-r--r--libbcachefs/async_objs_types.h24
-rw-r--r--libbcachefs/backpointers.c6
-rw-r--r--libbcachefs/bcachefs.h113
-rw-r--r--libbcachefs/bcachefs_format.h1
-rw-r--r--libbcachefs/btree_gc.c7
-rw-r--r--libbcachefs/btree_io.c41
-rw-r--r--libbcachefs/btree_io.h8
-rw-r--r--libbcachefs/btree_node_scan.c10
-rw-r--r--libbcachefs/btree_trans_commit.c5
-rw-r--r--libbcachefs/btree_update_interior.c7
-rw-r--r--libbcachefs/btree_write_buffer.c11
-rw-r--r--libbcachefs/buckets.c4
-rw-r--r--libbcachefs/chardev.c6
-rw-r--r--libbcachefs/data_update.c18
-rw-r--r--libbcachefs/data_update.h15
-rw-r--r--libbcachefs/debug.c64
-rw-r--r--libbcachefs/debug.h18
-rw-r--r--libbcachefs/disk_groups.c8
-rw-r--r--libbcachefs/ec.c32
-rw-r--r--libbcachefs/enumerated_ref.c144
-rw-r--r--libbcachefs/enumerated_ref.h66
-rw-r--r--libbcachefs/enumerated_ref_types.h19
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/fast_list.h41
-rw-r--r--libbcachefs/fs-io-direct.c7
-rw-r--r--libbcachefs/fs-io.c15
-rw-r--r--libbcachefs/fs-ioctl.c217
-rw-r--r--libbcachefs/fs-ioctl.h75
-rw-r--r--libbcachefs/fs.c193
-rw-r--r--libbcachefs/inode_format.h9
-rw-r--r--libbcachefs/io_read.c93
-rw-r--r--libbcachefs/io_read.h14
-rw-r--r--libbcachefs/io_write.c20
-rw-r--r--libbcachefs/journal.c14
-rw-r--r--libbcachefs/journal_io.c20
-rw-r--r--libbcachefs/journal_reclaim.c6
-rw-r--r--libbcachefs/movinggc.c11
-rw-r--r--libbcachefs/namei.c4
-rw-r--r--libbcachefs/opts.h5
-rw-r--r--libbcachefs/rebalance.c7
-rw-r--r--libbcachefs/recovery.c4
-rw-r--r--libbcachefs/recovery_passes.c70
-rw-r--r--libbcachefs/reflink.c5
-rw-r--r--libbcachefs/sb-members.h49
-rw-r--r--libbcachefs/snapshot.c7
-rw-r--r--libbcachefs/str_hash.h2
-rw-r--r--libbcachefs/subvolume.c7
-rw-r--r--libbcachefs/super-io.c21
-rw-r--r--libbcachefs/super.c208
-rw-r--r--libbcachefs/super.h3
-rw-r--r--libbcachefs/sysfs.c46
-rw-r--r--libbcachefs/util.c10
-rw-r--r--libbcachefs/util.h40
-rw-r--r--linux/wait.c25
63 files changed, 1258 insertions, 764 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index de77df06..5fe33c98 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-7307b739bbe5f1d9415f5c1da070723b3fb5abbd
+c9d875f9be1f853e747c9e00421c678b0adf73d2
diff --git a/Makefile b/Makefile
index 8cc1c9b4..3b25cf81 100644
--- a/Makefile
+++ b/Makefile
@@ -255,6 +255,7 @@ update-bcachefs-sources:
git rm -rf --ignore-unmatch libbcachefs
test -d libbcachefs || mkdir libbcachefs
cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
+ rm libbcachefs/fast_list.c libbcachefs/async_objs.c
git add libbcachefs/*.[ch]
git rm -f libbcachefs/mean_and_variance_test.c
cp $(LINUX_DIR)/include/linux/closure.h include/linux/
diff --git a/Makefile.compiler b/Makefile.compiler
index 8956587b..f4fcc1ea 100644
--- a/Makefile.compiler
+++ b/Makefile.compiler
@@ -79,8 +79,8 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
# Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage)
# TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4
__rustc-option = $(call try-run,\
- echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\
- $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\
+ echo '$(pound)![allow(missing_docs)]$(pound)![feature(no_core)]$(pound)![no_core]' | RUSTC_BOOTSTRAP=1\
+ $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null --target=%,$(2)) $(3)\
--crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4))
# rustc-option
diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c
index 1fa87a18..40ac87c7 100644
--- a/c_src/cmd_device.c
+++ b/c_src/cmd_device.c
@@ -521,11 +521,11 @@ static int cmd_device_resize(int argc, char *argv[])
struct bch_dev *resize = NULL;
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, 0) {
if (resize)
die("confused: more than one online device?");
resize = ca;
- percpu_ref_get(&resize->io_ref[READ]);
+ enumerated_ref_get(&resize->io_ref[READ], 0);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@@ -538,7 +538,7 @@ static int cmd_device_resize(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
- percpu_ref_put(&resize->io_ref[READ]);
+ enumerated_ref_put(&resize->io_ref[READ], 0);
bch2_fs_stop(c);
}
return 0;
@@ -618,11 +618,11 @@ static int cmd_device_resize_journal(int argc, char *argv[])
struct bch_dev *resize = NULL;
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, 0) {
if (resize)
die("confused: more than one online device?");
resize = ca;
- percpu_ref_get(&resize->io_ref[READ]);
+ enumerated_ref_get(&resize->io_ref[READ], 0);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@@ -632,7 +632,7 @@ static int cmd_device_resize_journal(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
- percpu_ref_put(&resize->io_ref[READ]);
+ enumerated_ref_put(&resize->io_ref[READ], 0);
bch2_fs_stop(c);
}
return 0;
diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c
index 04f099eb..7d7ceaa5 100644
--- a/c_src/cmd_dump.c
+++ b/c_src/cmd_dump.c
@@ -153,12 +153,12 @@ int cmd_dump(int argc, char *argv[])
down_read(&c->state_lock);
- for_each_online_member(c, ca)
+ for_each_online_member(c, ca, 0)
nr_devices++;
BUG_ON(!nr_devices);
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, 0) {
int flags = O_WRONLY|O_CREAT|O_TRUNC;
if (!force)
diff --git a/include/linux/completion.h b/include/linux/completion.h
index d11a8dd0..3a5d7c6a 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -36,6 +36,7 @@ static inline void reinit_completion(struct completion *x)
void complete(struct completion *);
void wait_for_completion(struct completion *);
+unsigned long wait_for_completion_timeout(struct completion *, unsigned long);
#define wait_for_completion_interruptible(x) (wait_for_completion(x), 0)
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 8b8c2344..37cbbf86 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -17,6 +17,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@@ -1381,7 +1382,7 @@ static void check_discard_freespace_key_work(struct work_struct *work)
container_of(work, struct check_discard_freespace_key_async, work);
bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
- bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
+ enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_key);
kfree(w);
}
@@ -1458,7 +1459,7 @@ delete:
if (!w)
goto out;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_freespace_key)) {
kfree(w);
goto out;
}
@@ -1952,26 +1953,26 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
- percpu_ref_put(&ca->io_ref[WRITE]);
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards))
goto put_write_ref;
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
put_write_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_do_discards(struct bch_fs *c)
@@ -2047,8 +2048,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
- percpu_ref_put(&ca->io_ref[WRITE]);
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
@@ -2058,18 +2059,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (discard_in_flight_add(ca, bucket, false))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard_fast))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_discard_one_bucket_fast))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
put_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static int invalidate_one_bp(struct btree_trans *trans,
@@ -2261,27 +2262,27 @@ restart_err:
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
- percpu_ref_put(&ca->io_ref[WRITE]);
bch2_bkey_buf_exit(&last_flushed, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_invalidate))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_do_invalidates))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
put_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_do_invalidates(struct bch_fs *c)
@@ -2503,15 +2504,15 @@ void bch2_recalc_capacity(struct bch_fs *c)
lockdep_assert_held(&c->state_lock);
- for_each_online_member(c, ca) {
- struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
-
- ra_pages += bdi->ra_pages;
- }
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL) {
+ struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev);
+ if (bdev)
+ ra_pages += bdev->bd_disk->bdi->ra_pages;
- bch2_set_ra_pages(c, ra_pages);
+ if (ca->mi.state != BCH_MEMBER_STATE_rw)
+ continue;
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
u64 dev_reserve = 0;
/*
@@ -2548,6 +2549,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
bucket_size_max = max_t(unsigned, bucket_size_max,
ca->mi.bucket_size);
}
+ rcu_read_unlock();
+
+ bch2_set_ra_pages(c, ra_pages);
gc_reserve = c->opts.gc_reserve_bytes
? c->opts.gc_reserve_bytes >> 9
@@ -2570,8 +2574,10 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
u64 ret = U64_MAX;
- for_each_rw_member(c, ca)
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca)
ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+ rcu_read_unlock();
return ret;
}
@@ -2595,8 +2601,12 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
{
+ /* BCH_DATA_free == all rw devs */
+
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
- if (rw && (ca->mi.data_allowed & BIT(i)))
+ if (rw &&
+ (i == BCH_DATA_free ||
+ (ca->mi.data_allowed & BIT(i))))
set_bit(ca->dev_idx, c->rw_devs[i].d);
else
clear_bit(ca->dev_idx, c->rw_devs[i].d);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 59950d73..ac1244e7 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -1623,7 +1623,12 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
- for_each_online_member(c, ca) {
+ bch2_printbuf_make_room(&buf, 4096);
+
+ rcu_read_lock();
+ buf.atomic++;
+
+ for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
printbuf_indent_add(&buf, 2);
bch2_dev_alloc_debug_to_text(&buf, ca);
@@ -1631,6 +1636,9 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
prt_newline(&buf);
}
+ --buf.atomic;
+ rcu_read_unlock();
+
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
bch2_copygc_wait_to_text(&buf, c);
diff --git a/libbcachefs/async_objs.h b/libbcachefs/async_objs.h
new file mode 100644
index 00000000..cd6489b8
--- /dev/null
+++ b/libbcachefs/async_objs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ASYNC_OBJS_H
+#define _BCACHEFS_ASYNC_OBJS_H
+
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+static inline void __async_object_list_del(struct fast_list *head, unsigned idx)
+{
+ fast_list_remove(head, idx);
+}
+
+static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx)
+{
+ int ret = fast_list_add(head, obj);
+ *idx = ret > 0 ? ret : 0;
+ return ret < 0 ? ret : 0;
+}
+
+#define async_object_list_del(_c, _list, idx) \
+ __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx)
+
+#define async_object_list_add(_c, _list, obj, idx) \
+ __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx)
+
+void bch2_fs_async_obj_debugfs_init(struct bch_fs *);
+void bch2_fs_async_obj_exit(struct bch_fs *);
+int bch2_fs_async_obj_init(struct bch_fs *);
+
+#else /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
+
+#define async_object_list_del(_c, _n, idx) do {} while (0)
+
+static inline int __async_object_list_add(void)
+{
+ return 0;
+}
+#define async_object_list_add(_c, _n, obj, idx) __async_object_list_add()
+
+static inline void bch2_fs_async_obj_debugfs_init(struct bch_fs *c) {}
+static inline void bch2_fs_async_obj_exit(struct bch_fs *c) {}
+static inline int bch2_fs_async_obj_init(struct bch_fs *c) { return 0; }
+
+#endif /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
+
+#endif /* _BCACHEFS_ASYNC_OBJS_H */
diff --git a/libbcachefs/async_objs_types.h b/libbcachefs/async_objs_types.h
new file mode 100644
index 00000000..310a4f90
--- /dev/null
+++ b/libbcachefs/async_objs_types.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ASYNC_OBJS_TYPES_H
+#define _BCACHEFS_ASYNC_OBJS_TYPES_H
+
+#define BCH_ASYNC_OBJ_LISTS() \
+ x(promote) \
+ x(rbio) \
+ x(btree_read_bio) \
+ x(btree_write_bio)
+
+enum bch_async_obj_lists {
+#define x(n) BCH_ASYNC_OBJ_LIST_##n,
+ BCH_ASYNC_OBJ_LISTS()
+#undef x
+ BCH_ASYNC_OBJ_NR
+};
+
+struct async_obj_list {
+ struct fast_list list;
+ void (*obj_to_text)(struct printbuf *, void *);
+ unsigned idx;
+};
+
+#endif /* _BCACHEFS_ASYNC_OBJS_TYPES_H */
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index ff26bb51..b870eedf 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -437,7 +437,8 @@ found:
bytes = p.crc.compressed_size << 9;
- struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ,
+ BCH_DEV_READ_REF_check_extent_checksums);
if (!ca)
return false;
@@ -474,7 +475,8 @@ err:
if (bio)
bio_put(bio);
kvfree(data_buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_check_extent_checksums);
printbuf_exit(&buf);
return ret;
}
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index a1a4e941..675b8f8e 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -209,6 +209,7 @@
#include "btree_journal_iter_types.h"
#include "disk_accounting_types.h"
#include "errcode.h"
+#include "fast_list.h"
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
@@ -219,7 +220,7 @@
#include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG
-#define BCH_WRITE_REF_DEBUG
+#define ENUMERATED_REF_DEBUG
#endif
#ifndef dynamic_fault
@@ -474,6 +475,7 @@ enum bch_time_stats {
};
#include "alloc_types.h"
+#include "async_objs_types.h"
#include "btree_gc_types.h"
#include "btree_types.h"
#include "btree_node_scan_types.h"
@@ -483,6 +485,7 @@ enum bch_time_stats {
#include "clock_types.h"
#include "disk_groups_types.h"
#include "ec_types.h"
+#include "enumerated_ref_types.h"
#include "journal_types.h"
#include "keylist_types.h"
#include "quota_types.h"
@@ -515,6 +518,51 @@ struct discard_in_flight {
u64 bucket:63;
};
+#define BCH_DEV_READ_REFS() \
+ x(bch2_online_devs) \
+ x(trans_mark_dev_sbs) \
+ x(read_fua_test) \
+ x(sb_field_resize) \
+ x(write_super) \
+ x(journal_read) \
+ x(fs_journal_alloc) \
+ x(fs_resize_on_mount) \
+ x(btree_node_read) \
+ x(btree_node_read_all_replicas) \
+ x(btree_node_scrub) \
+ x(btree_node_write) \
+ x(btree_node_scan) \
+ x(btree_verify_replicas) \
+ x(btree_node_ondisk_to_text) \
+ x(io_read) \
+ x(check_extent_checksums) \
+ x(ec_block)
+
+enum bch_dev_read_ref {
+#define x(n) BCH_DEV_READ_REF_##n,
+ BCH_DEV_READ_REFS()
+#undef x
+ BCH_DEV_READ_REF_NR,
+};
+
+#define BCH_DEV_WRITE_REFS() \
+ x(journal_write) \
+ x(journal_do_discards) \
+ x(dev_do_discards) \
+ x(discard_one_bucket_fast) \
+ x(do_invalidates) \
+ x(nocow_flush) \
+ x(io_write) \
+ x(ec_block) \
+ x(ec_bucket_zero)
+
+enum bch_dev_write_ref {
+#define x(n) BCH_DEV_WRITE_REF_##n,
+ BCH_DEV_WRITE_REFS()
+#undef x
+ BCH_DEV_WRITE_REF_NR,
+};
+
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -525,8 +573,7 @@ struct bch_dev {
struct percpu_ref ref;
#endif
struct completion ref_completion;
- struct percpu_ref io_ref[2];
- struct completion io_ref_completion[2];
+ struct enumerated_ref io_ref[2];
struct bch_fs *fs;
@@ -733,11 +780,7 @@ struct bch_fs {
struct rw_semaphore state_lock;
/* Counts outstanding writes, for clean transition to read-only */
-#ifdef BCH_WRITE_REF_DEBUG
- atomic_long_t writes[BCH_WRITE_REF_NR];
-#else
- struct percpu_ref writes;
-#endif
+ struct enumerated_ref writes;
/*
* Certain operations are only allowed in single threaded mode, during
* recovery, and we want to assert that this is the case:
@@ -891,6 +934,7 @@ struct bch_fs {
struct workqueue_struct *write_ref_wq;
/* ALLOCATION */
+ struct bch_devs_mask online_devs;
struct bch_devs_mask rw_devs[BCH_DATA_NR];
unsigned long rw_devs_change_count;
@@ -985,6 +1029,10 @@ struct bch_fs {
nocow_locks;
struct rhashtable promote_table;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
+#endif
+
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
size_t zstd_workspace_size;
@@ -1073,6 +1121,7 @@ struct bch_fs {
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
struct dentry *btree_debug_dir;
+ struct dentry *async_obj_dir;
struct btree_debug btree_debug[BTREE_ID_NR];
struct btree *verify_data;
struct btree_node *verify_ondisk;
@@ -1114,54 +1163,6 @@ struct bch_fs {
extern struct wait_queue_head bch2_read_only_wait;
-static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- atomic_long_inc(&c->writes[ref]);
-#else
- percpu_ref_get(&c->writes);
-#endif
-}
-
-static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- return !test_bit(BCH_FS_going_ro, &c->flags) &&
- atomic_long_inc_not_zero(&c->writes[ref]);
-#else
- return percpu_ref_tryget(&c->writes);
-#endif
-}
-
-static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- return !test_bit(BCH_FS_going_ro, &c->flags) &&
- atomic_long_inc_not_zero(&c->writes[ref]);
-#else
- return percpu_ref_tryget_live(&c->writes);
-#endif
-}
-
-static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
- long v = atomic_long_dec_return(&c->writes[ref]);
-
- BUG_ON(v < 0);
- if (v)
- return;
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
- if (atomic_long_read(&c->writes[i]))
- return;
-
- set_bit(BCH_FS_write_disable_complete, &c->flags);
- wake_up(&bch2_read_only_wait);
-#else
- percpu_ref_put(&c->writes);
-#endif
-}
-
static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
{
if (test_bit(BCH_FS_stopping, &c->flags))
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 9af2a607..7ce475c5 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -868,6 +868,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
+LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 2824a6e8..9db8f5f8 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -22,6 +22,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@@ -1233,14 +1234,14 @@ static void bch2_gc_gens_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
bch2_gc_gens(c);
- bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
void bch2_gc_gens_async(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) &&
+ if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) &&
!queue_work(c->write_ref_wq, &c->gc_gens_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index a261e856..5cd27a9c 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "async_objs.h"
#include "bkey_buf.h"
#include "bkey_methods.h"
#include "bkey_sort.h"
@@ -13,6 +14,7 @@
#include "buckets.h"
#include "checksum.h"
#include "debug.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@@ -1324,7 +1326,7 @@ static void btree_node_read_work(struct work_struct *work)
while (1) {
retry = true;
bch_info(c, "retrying read");
- ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
+ ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = ca != NULL;
rb->start_time = local_clock();
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
@@ -1349,7 +1351,7 @@ start:
"btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = false;
bch2_mark_io_failure(&failed, &rb->pick, false);
@@ -1374,6 +1376,7 @@ start:
}
}
+ async_object_list_del(c, btree_read_bio, rb->list_idx);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
@@ -1413,6 +1416,11 @@ static void btree_node_read_endio(struct bio *bio)
queue_work(c->btree_read_complete_wq, &rb->work);
}
+void bch2_btree_read_bio_to_text(struct printbuf *out, struct btree_read_bio *rbio)
+{
+ bch2_bio_to_text(out, &rbio->bio);
+}
+
struct btree_node_read_all {
struct closure cl;
struct bch_fs *c;
@@ -1605,7 +1613,8 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
bch2_latency_acct(ca, rb->start_time, READ);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_read_all_replicas);
}
ra->err[rb->idx] = bio->bi_status;
@@ -1645,7 +1654,8 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_read_all_replicas);
struct btree_read_bio *rb =
container_of(ra->bio[i], struct btree_read_bio, bio);
rb->c = c;
@@ -1722,7 +1732,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
return;
}
- ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
bio = bio_alloc_bioset(NULL,
buf_pages(b->data, btree_buf_bytes(b)),
@@ -1741,6 +1751,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bio->bi_end_io = btree_node_read_endio;
bch2_bio_map(bio, b->data, btree_buf_bytes(b));
+ async_object_list_add(c, btree_read_bio, rb, &rb->list_idx);
+
if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
bio_sectors(bio));
@@ -1925,9 +1937,9 @@ err:
printbuf_exit(&err);
bch2_bkey_buf_exit(&scrub->key, c);;
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
- percpu_ref_put(&scrub->ca->io_ref[READ]);
+ enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
kfree(scrub);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
}
static void btree_node_scrub_endio(struct bio *bio)
@@ -1946,7 +1958,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
struct bch_fs *c = trans->c;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub))
return -BCH_ERR_erofs_no_writes;
struct extent_ptr_decoded pick;
@@ -1954,7 +1966,8 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
if (ret <= 0)
goto err;
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_scrub);
if (!ca) {
ret = -BCH_ERR_device_offline;
goto err;
@@ -1994,9 +2007,9 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
return 0;
err_free:
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
err:
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
return ret;
}
@@ -2111,6 +2124,7 @@ static void btree_node_write_work(struct work_struct *work)
goto err;
}
out:
+ async_object_list_del(c, btree_write_bio, wbio->list_idx);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b, start_time);
return;
@@ -2162,7 +2176,8 @@ static void btree_node_write_endio(struct bio *bio)
* btree writes yet (due to device removal/ro):
*/
if (wbio->have_ioref)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_write);
if (parent) {
bio_put(bio);
@@ -2461,6 +2476,8 @@ do_write:
atomic64_inc(&c->btree_write_stats[type].nr);
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
+ async_object_list_add(c, btree_write_bio, wbio, &wbio->list_idx);
+
INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->btree_write_submit_wq, &wbio->work);
return;
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index dbf76d22..afdb11a9 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -41,6 +41,9 @@ struct btree_read_bio {
u64 start_time;
unsigned have_ioref:1;
unsigned idx:7;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;
@@ -53,6 +56,9 @@ struct btree_write_bio {
unsigned data_bytes;
unsigned sector_offset;
u64 start_time;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct bch_write_bio wbio;
};
@@ -133,6 +139,8 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
+void bch2_btree_read_bio_to_text(struct printbuf *, struct btree_read_bio *);
+
int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, unsigned);
diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c
index 81ee7ae8..7bd13438 100644
--- a/libbcachefs/btree_node_scan.c
+++ b/libbcachefs/btree_node_scan.c
@@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
err:
bio_put(bio);
free_page((unsigned long) buf);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
return 0;
@@ -285,13 +285,13 @@ static int read_btree_nodes(struct find_btree_nodes *f)
closure_init_stack(&cl);
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) {
if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
continue;
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
if (!w) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
ret = -ENOMEM;
goto err;
}
@@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f)
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
kfree(w);
bch_err_msg(c, ret, "starting kthread");
break;
}
closure_get(&cl);
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
wake_up_process(t);
}
err:
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 4297d8b5..cdde769e 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -11,6 +11,7 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "disk_accounting.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
@@ -994,7 +995,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
- unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
+ unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
ret = do_bch2_trans_commit_to_journal_replay(trans);
else
@@ -1060,7 +1061,7 @@ retry:
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
- bch2_write_ref_put(c, BCH_WRITE_REF_trans);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans);
out_reset:
if (!ret)
bch2_trans_downgrade(trans);
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index a0ff2dc0..fcca8b01 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -14,6 +14,7 @@
#include "btree_locking.h"
#include "buckets.h"
#include "clock.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@@ -2326,7 +2327,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
closure_wake_up(&c->btree_node_rewrites_wait);
bch2_bkey_buf_exit(&a->key, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@@ -2348,7 +2349,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
spin_lock(&c->btree_node_rewrites_lock);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
+ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
list_add(&a->list, &c->btree_node_rewrites);
now = true;
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
@@ -2387,7 +2388,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
if (!a)
break;
- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_node_rewrite_worker, &a->work);
}
}
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 68ab48af..0094e434 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -7,6 +7,7 @@
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "disk_accounting.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@@ -629,11 +630,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer))
return -BCH_ERR_erofs_no_writes;
int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
return ret;
}
@@ -692,7 +693,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
} while (!ret && bch2_btree_write_buffer_should_flush(c));
mutex_unlock(&wb->flushing.lock);
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
}
static void wb_accounting_sort(struct btree_write_buffer *wb)
@@ -821,9 +822,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
if (bch2_btree_write_buffer_should_flush(c) &&
- __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
+ __enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer) &&
!queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
if (dst->wb == &wb->flushing)
mutex_unlock(&wb->flushing.lock);
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 09e84d4a..7c267244 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -1139,10 +1139,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
enum btree_iter_update_trigger_flags flags)
{
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_trans_mark_dev_sbs) {
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_trans_mark_dev_sbs);
return ret;
}
}
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
index 5891b3a1..4066946b 100644
--- a/libbcachefs/chardev.c
+++ b/libbcachefs/chardev.c
@@ -613,11 +613,13 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
if (!dev)
return -EINVAL;
- for_each_online_member(c, ca)
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
if (ca->dev == dev) {
- percpu_ref_put(&ca->io_ref[READ]);
+ rcu_read_unlock();
return ca->dev_idx;
}
+ rcu_read_unlock();
return -BCH_ERR_ENOENT_dev_idx_not_found;
}
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index c3034338..9b44f11f 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -587,6 +587,10 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
+ prt_newline(out);
+
+ prt_str_indented(out, "scrub:\t");
+ prt_u64(out, data_opts->scrub);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
@@ -607,9 +611,17 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
prt_newline(out);
printbuf_indent_add(out, 2);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
- prt_printf(out, "read_done:\t%u\n", m->read_done);
- bch2_write_op_to_text(out, &m->op);
- printbuf_indent_sub(out, 2);
+
+ if (!m->read_done) {
+ prt_printf(out, "read:\n");
+ printbuf_indent_add(out, 2);
+ bch2_read_bio_to_text(out, &m->rbio);
+ } else {
+ prt_printf(out, "write:\n");
+ printbuf_indent_add(out, 2);
+ bch2_write_op_to_text(out, &m->op);
+ }
+ printbuf_indent_sub(out, 4);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index ed051258..5e14d135 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -50,6 +50,21 @@ struct data_update {
struct bio_vec *bvecs;
};
+struct promote_op {
+ struct rcu_head rcu;
+ u64 start_time;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
+
+ struct rhash_head hash;
+ struct bpos pos;
+
+ struct work_struct work;
+ struct data_update write;
+ struct bio_vec bi_inline_vecs[]; /* must be last */
+};
+
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *);
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 8ad710bb..c24de2e3 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_io.h"
@@ -16,6 +17,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
+#include "data_update.h"
#include "debug.h"
#include "error.h"
#include "extents.h"
@@ -42,7 +44,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
struct bio *bio;
bool failed = false, saw_error = false;
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_verify_replicas);
if (!ca)
return false;
@@ -57,7 +60,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
submit_bio_wait(bio);
bio_put(bio);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_verify_replicas);
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
@@ -196,7 +200,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
return;
}
- ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_btree_node_ondisk_to_text);
if (!ca) {
prt_printf(out, "error getting device to read from: not online\n");
return;
@@ -297,28 +302,13 @@ out:
if (bio)
bio_put(bio);
kvfree(n_ondisk);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_btree_node_ondisk_to_text);
}
#ifdef CONFIG_DEBUG_FS
-/* XXX: bch_fs refcounting */
-
-struct dump_iter {
- struct bch_fs *c;
- enum btree_id id;
- struct bpos from;
- struct bpos prev_node;
- u64 iter;
-
- struct printbuf buf;
-
- char __user *ubuf; /* destination user buffer */
- size_t size; /* size of requested read */
- ssize_t ret; /* bytes read so far */
-};
-
-static ssize_t flush_buf(struct dump_iter *i)
+ssize_t bch2_debugfs_flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
@@ -356,7 +346,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file)
return 0;
}
-static int bch2_dump_release(struct inode *inode, struct file *file)
+int bch2_dump_release(struct inode *inode, struct file *file)
{
struct dump_iter *i = file->private_data;
@@ -374,7 +364,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- return flush_buf(i) ?:
+ return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@@ -383,7 +373,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
prt_newline(&i->buf);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
- flush_buf(i);
+ bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@@ -404,7 +394,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- ssize_t ret = flush_buf(i);
+ ssize_t ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@@ -418,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
? bpos_successor(b->key.k.p)
: b->key.k.p;
- drop_locks_do(trans, flush_buf(i));
+ drop_locks_do(trans, bch2_debugfs_flush_buf(i));
}))) ?: i->ret;
}
@@ -438,7 +428,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- return flush_buf(i) ?:
+ return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@@ -456,7 +446,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
bch2_bfloat_to_text(&i->buf, l->b, _k);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
- flush_buf(i);
+ bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@@ -517,7 +507,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct rhash_head *pos;
struct btree *b;
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@@ -540,7 +530,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -614,7 +604,7 @@ restart:
closure_put(&trans->ref);
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
if (ret)
goto unlocked;
@@ -627,7 +617,7 @@ unlocked:
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -652,7 +642,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->ret = 0;
while (1) {
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -695,7 +685,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
i->iter++;
}
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -753,7 +743,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
while (1) {
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
- err = flush_buf(i);
+ err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@@ -874,7 +864,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
- ret = flush_buf(i);
+ ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@@ -963,6 +953,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("write_points", 0400, c->fs_debug_dir,
c->btree_debug, &write_points_ops);
+ bch2_fs_async_obj_debugfs_init(c);
+
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;
diff --git a/libbcachefs/debug.h b/libbcachefs/debug.h
index 2c37143b..52dbea73 100644
--- a/libbcachefs/debug.h
+++ b/libbcachefs/debug.h
@@ -19,6 +19,24 @@ static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
}
#ifdef CONFIG_DEBUG_FS
+struct dump_iter {
+ struct bch_fs *c;
+ struct async_obj_list *list;
+ enum btree_id id;
+ struct bpos from;
+ struct bpos prev_node;
+ u64 iter;
+
+ struct printbuf buf;
+
+ char __user *ubuf; /* destination user buffer */
+ size_t size; /* size of requested read */
+ ssize_t ret; /* bytes read so far */
+};
+
+ssize_t bch2_debugfs_flush_buf(struct dump_iter *);
+int bch2_dump_release(struct inode *, struct file *);
+
void bch2_fs_debug_exit(struct bch_fs *);
void bch2_fs_debug_init(struct bch_fs *);
#else
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
index 1186280b..79688902 100644
--- a/libbcachefs/disk_groups.c
+++ b/libbcachefs/disk_groups.c
@@ -555,14 +555,12 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
? rcu_dereference(c->devs[t.dev])
: NULL;
- if (ca && percpu_ref_tryget(&ca->io_ref[READ])) {
+ if (ca && ca->disk_sb.bdev)
prt_printf(out, "/dev/%s", ca->name);
- percpu_ref_put(&ca->io_ref[READ]);
- } else if (ca) {
+ else if (ca)
prt_printf(out, "offline device %u", t.dev);
- } else {
+ else
prt_printf(out, "invalid device %u", t.dev);
- }
rcu_read_unlock();
out->atomic--;
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 42600370..1dde0a03 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -16,6 +16,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_write.h"
@@ -706,6 +707,9 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@@ -727,7 +731,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
closure_put(cl);
}
@@ -741,8 +745,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
? BCH_DATA_user
: BCH_DATA_parity;
int rw = op_is_write(opf);
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref);
if (!ca) {
clear_bit(idx, buf->valid);
return;
@@ -788,14 +795,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
- percpu_ref_get(&ca->io_ref[rw]);
+ enumerated_ref_get(&ca->io_ref[rw], ref);
submit_bio(&ec_bio->bio);
offset += b;
}
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@@ -1017,14 +1024,14 @@ static void ec_stripe_delete_work(struct work_struct *work)
BCH_TRANS_COMMIT_no_enospc, ({
ec_stripe_delete(trans, lru_k.k->p.offset);
})));
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
void bch2_do_stripe_deletes(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
+ if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
/* stripe creation: */
@@ -1252,7 +1259,8 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
unsigned block,
struct open_bucket *ob)
{
- struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE,
+ BCH_DEV_WRITE_REF_ec_bucket_zero);
if (!ca) {
s->err = -BCH_ERR_erofs_no_writes;
return;
@@ -1268,7 +1276,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_ec_bucket_zero);
if (ret)
s->err = ret;
@@ -1418,15 +1426,15 @@ static void ec_stripe_create_work(struct work_struct *work)
while ((s = get_pending_stripe(c)))
ec_stripe_create(s);
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
void bch2_ec_do_stripe_creates(struct bch_fs *c)
{
- bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
diff --git a/libbcachefs/enumerated_ref.c b/libbcachefs/enumerated_ref.c
new file mode 100644
index 00000000..56ab430f
--- /dev/null
+++ b/libbcachefs/enumerated_ref.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "enumerated_ref.h"
+#include "util.h"
+
+#include <linux/completion.h>
+
+#ifdef ENUMERATED_REF_DEBUG
+void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ atomic_long_inc(&ref->refs[idx]);
+}
+
+bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ return atomic_long_inc_not_zero(&ref->refs[idx]);
+}
+
+bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ return !ref->dying &&
+ atomic_long_inc_not_zero(&ref->refs[idx]);
+}
+
+void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
+{
+ BUG_ON(idx >= ref->nr);
+ long v = atomic_long_dec_return(&ref->refs[idx]);
+
+ BUG_ON(v < 0);
+ if (v)
+ return;
+
+ for (unsigned i = 0; i < ref->nr; i++)
+ if (atomic_long_read(&ref->refs[i]))
+ return;
+
+ if (ref->stop_fn)
+ ref->stop_fn(ref);
+ complete(&ref->stop_complete);
+}
+#endif
+
+#ifndef ENUMERATED_REF_DEBUG
+static void enumerated_ref_kill_cb(struct percpu_ref *percpu_ref)
+{
+ struct enumerated_ref *ref =
+ container_of(percpu_ref, struct enumerated_ref, ref);
+
+ if (ref->stop_fn)
+ ref->stop_fn(ref);
+ complete(&ref->stop_complete);
+}
+#endif
+
+void enumerated_ref_stop_async(struct enumerated_ref *ref)
+{
+ reinit_completion(&ref->stop_complete);
+
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_kill(&ref->ref);
+#else
+ ref->dying = true;
+ for (unsigned i = 0; i < ref->nr; i++)
+ enumerated_ref_put(ref, i);
+#endif
+}
+
+void enumerated_ref_stop(struct enumerated_ref *ref,
+ const char * const names[])
+{
+ enumerated_ref_stop_async(ref);
+ while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n");
+ prt_str(&buf, "Outstanding refs:\n");
+ enumerated_ref_to_text(&buf, ref, names);
+ printk(KERN_ERR "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+void enumerated_ref_start(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_reinit(&ref->ref);
+#else
+ ref->dying = false;
+ for (unsigned i = 0; i < ref->nr; i++) {
+ BUG_ON(atomic_long_read(&ref->refs[i]));
+ atomic_long_inc(&ref->refs[i]);
+ }
+#endif
+}
+
+void enumerated_ref_exit(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ percpu_ref_exit(&ref->ref);
+#else
+ kfree(ref->refs);
+ ref->refs = NULL;
+ ref->nr = 0;
+#endif
+}
+
+int enumerated_ref_init(struct enumerated_ref *ref, unsigned nr,
+ void (*stop_fn)(struct enumerated_ref *))
+{
+ init_completion(&ref->stop_complete);
+ ref->stop_fn = stop_fn;
+
+#ifndef ENUMERATED_REF_DEBUG
+ return percpu_ref_init(&ref->ref, enumerated_ref_kill_cb,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL);
+#else
+ ref->refs = kzalloc(sizeof(ref->refs[0]) * nr, GFP_KERNEL);
+ if (!ref->refs)
+ return -ENOMEM;
+
+ ref->nr = nr;
+ return 0;
+#endif
+}
+
+void enumerated_ref_to_text(struct printbuf *out,
+ struct enumerated_ref *ref,
+ const char * const names[])
+{
+#ifdef ENUMERATED_REF_DEBUG
+ bch2_printbuf_tabstop_push(out, 32);
+
+ for (unsigned i = 0; i < ref->nr; i++)
+ prt_printf(out, "%s\t%li\n", names[i],
+ atomic_long_read(&ref->refs[i]));
+#else
+ prt_str(out, "(not in debug mode)\n");
+#endif
+}
diff --git a/libbcachefs/enumerated_ref.h b/libbcachefs/enumerated_ref.h
new file mode 100644
index 00000000..ec01cf59
--- /dev/null
+++ b/libbcachefs/enumerated_ref.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ENUMERATED_REF_H
+#define _BCACHEFS_ENUMERATED_REF_H
+
+#include "enumerated_ref_types.h"
+
+/*
+ * A refcount where the users are enumerated: in debug mode, we create sepate
+ * refcounts for each user, to make leaks and refcount errors easy to track
+ * down:
+ */
+
+#ifdef ENUMERATED_REF_DEBUG
+void enumerated_ref_get(struct enumerated_ref *, unsigned);
+bool __enumerated_ref_tryget(struct enumerated_ref *, unsigned);
+bool enumerated_ref_tryget(struct enumerated_ref *, unsigned);
+void enumerated_ref_put(struct enumerated_ref *, unsigned);
+#else
+
+static inline void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
+{
+ percpu_ref_get(&ref->ref);
+}
+
+static inline bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ return percpu_ref_tryget(&ref->ref);
+}
+
+static inline bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
+{
+ return percpu_ref_tryget_live(&ref->ref);
+}
+
+static inline void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
+{
+ percpu_ref_put(&ref->ref);
+}
+#endif
+
+static inline bool enumerated_ref_is_zero(struct enumerated_ref *ref)
+{
+#ifndef ENUMERATED_REF_DEBUG
+ return percpu_ref_is_zero(&ref->ref);
+#else
+ for (unsigned i = 0; i < ref->nr; i++)
+ if (atomic_long_read(&ref->refs[i]))
+ return false;
+ return true;
+#endif
+}
+
+void enumerated_ref_stop_async(struct enumerated_ref *);
+void enumerated_ref_stop(struct enumerated_ref *, const char * const[]);
+void enumerated_ref_start(struct enumerated_ref *);
+
+void enumerated_ref_exit(struct enumerated_ref *);
+int enumerated_ref_init(struct enumerated_ref *, unsigned,
+ void (*stop_fn)(struct enumerated_ref *));
+
+struct printbuf;
+void enumerated_ref_to_text(struct printbuf *,
+ struct enumerated_ref *,
+ const char * const[]);
+
+#endif /* _BCACHEFS_ENUMERATED_REF_H */
diff --git a/libbcachefs/enumerated_ref_types.h b/libbcachefs/enumerated_ref_types.h
new file mode 100644
index 00000000..0e6076f4
--- /dev/null
+++ b/libbcachefs/enumerated_ref_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ENUMERATED_REF_TYPES_H
+#define _BCACHEFS_ENUMERATED_REF_TYPES_H
+
+#include <linux/percpu-refcount.h>
+
+struct enumerated_ref {
+#ifdef ENUMERATED_REF_DEBUG
+ unsigned nr;
+ bool dying;
+ atomic_long_t *refs;
+#else
+ struct percpu_ref ref;
+#endif
+ void (*stop_fn)(struct enumerated_ref *);
+ struct completion stop_complete;
+};
+
+#endif /* _BCACHEFS_ENUMERATED_REF_TYPES_H */
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 996cdf62..83ed5a63 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -53,6 +53,7 @@
x(ENOMEM, ENOMEM_dio_write_bioset_init) \
x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \
x(ENOMEM, ENOMEM_promote_table_init) \
+ x(ENOMEM, ENOMEM_async_obj_init) \
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \
diff --git a/libbcachefs/fast_list.h b/libbcachefs/fast_list.h
new file mode 100644
index 00000000..73c9bf59
--- /dev/null
+++ b/libbcachefs/fast_list.h
@@ -0,0 +1,41 @@
+#ifndef _LINUX_FAST_LIST_H
+#define _LINUX_FAST_LIST_H
+
+#include <linux/generic-radix-tree.h>
+#include <linux/idr.h>
+#include <linux/percpu.h>
+
+struct fast_list_pcpu;
+
+struct fast_list {
+ GENRADIX(void *) items;
+ struct ida slots_allocated;;
+ struct fast_list_pcpu __percpu
+ *buffer;
+};
+
+static inline void *fast_list_iter_peek(struct genradix_iter *iter,
+ struct fast_list *list)
+{
+ void **p;
+ while ((p = genradix_iter_peek(iter, &list->items)) && !*p)
+ genradix_iter_advance(iter, &list->items);
+
+ return p ? *p : NULL;
+}
+
+#define fast_list_for_each_from(_list, _iter, _i, _start) \
+ for (_iter = genradix_iter_init(&(_list)->items, _start); \
+ (_i = fast_list_iter_peek(&(_iter), _list)) != NULL; \
+ genradix_iter_advance(&(_iter), &(_list)->items))
+
+#define fast_list_for_each(_list, _iter, _i) \
+ fast_list_for_each_from(_list, _iter, _i, 0)
+
+int fast_list_get_idx(struct fast_list *l);
+int fast_list_add(struct fast_list *l, void *item);
+void fast_list_remove(struct fast_list *l, unsigned idx);
+void fast_list_exit(struct fast_list *l);
+int fast_list_init(struct fast_list *l);
+
+#endif /* _LINUX_FAST_LIST_H */
diff --git a/libbcachefs/fs-io-direct.c b/libbcachefs/fs-io-direct.c
index 535bc5fc..1f5154d9 100644
--- a/libbcachefs/fs-io-direct.c
+++ b/libbcachefs/fs-io-direct.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "enumerated_ref.h"
#include "fs.h"
#include "fs-io.h"
#include "fs-io-direct.h"
@@ -401,7 +402,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
- bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@@ -606,7 +607,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_dio_write))
return -EROFS;
inode_lock(&inode->v);
@@ -675,7 +676,7 @@ err_put_bio:
bio_put(bio);
inode_dio_end(&inode->v);
err_put_write_ref:
- bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
goto out;
}
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 409bba39..801e9cd6 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -7,6 +7,7 @@
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "extent_update.h"
@@ -48,7 +49,8 @@ static void nocow_flush_endio(struct bio *_bio)
struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
closure_put(bio->cl);
- percpu_ref_put(&bio->ca->io_ref[WRITE]);
+ enumerated_ref_put(&bio->ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_nocow_flush);
bio_put(&bio->bio);
}
@@ -71,7 +73,8 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
rcu_read_lock();
ca = rcu_dereference(c->devs[dev]);
- if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE]))
+ if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_nocow_flush))
ca = NULL;
rcu_read_unlock();
@@ -205,7 +208,7 @@ static int bch2_flush_inode(struct bch_fs *c,
if (c->opts.journal_flush_disabled)
return 0;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync))
return -EROFS;
u64 seq;
@@ -213,7 +216,7 @@ static int bch2_flush_inode(struct bch_fs *c,
bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
- bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync);
return ret;
}
@@ -796,7 +799,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
long ret;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fallocate))
return -EROFS;
inode_lock(&inode->v);
@@ -820,7 +823,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
err:
bch2_pagecache_block_put(inode);
inode_unlock(&inode->v);
- bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_fallocate);
return bch2_err_class(ret);
}
diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c
index 14886e1d..a82dfce9 100644
--- a/libbcachefs/fs-ioctl.c
+++ b/libbcachefs/fs-ioctl.c
@@ -21,206 +21,6 @@
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-struct flags_set {
- unsigned mask;
- unsigned flags;
-
- unsigned projid;
-
- bool set_projinherit;
- bool projinherit;
-};
-
-static int bch2_inode_flags_set(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi,
- void *p)
-{
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- /*
- * We're relying on btree locking here for exclusion with other ioctl
- * calls - use the flags in the btree (@bi), not inode->i_flags:
- */
- struct flags_set *s = p;
- unsigned newflags = s->flags;
- unsigned oldflags = bi->bi_flags & s->mask;
-
- if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
- !capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
- if (!S_ISREG(bi->bi_mode) &&
- !S_ISDIR(bi->bi_mode) &&
- (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
- return -EINVAL;
-
- if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
-#ifdef CONFIG_UNICODE
- int ret = 0;
- /* Not supported on individual files. */
- if (!S_ISDIR(bi->bi_mode))
- return -EOPNOTSUPP;
-
- /*
- * Make sure the dir is empty, as otherwise we'd need to
- * rehash everything and update the dirent keys.
- */
- ret = bch2_empty_dir_trans(trans, inode_inum(inode));
- if (ret < 0)
- return ret;
-
- ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
- if (ret)
- return ret;
-
- bch2_check_set_feature(c, BCH_FEATURE_casefolding);
-#else
- printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
- return -EOPNOTSUPP;
-#endif
- }
-
- if (s->set_projinherit) {
- bi->bi_fields_set &= ~(1 << Inode_opt_project);
- bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
- }
-
- bi->bi_flags &= ~s->mask;
- bi->bi_flags |= newflags;
-
- bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
- return 0;
-}
-
-static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-{
- unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-
- return put_user(flags, arg);
-}
-
-static int bch2_ioc_setflags(struct bch_fs *c,
- struct file *file,
- struct bch_inode_info *inode,
- void __user *arg)
-{
- struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
- unsigned uflags;
- int ret;
-
- if (get_user(uflags, (int __user *) arg))
- return -EFAULT;
-
- s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
- if (uflags)
- return -EOPNOTSUPP;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- inode_lock(&inode->v);
- if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
- ret = -EACCES;
- goto setflags_out;
- }
-
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
- bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
- ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
-
-setflags_out:
- inode_unlock(&inode->v);
- mnt_drop_write_file(file);
- return ret;
-}
-
-static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
- struct fsxattr __user *arg)
-{
- struct fsxattr fa = { 0 };
-
- fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-
- if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
- fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
-
- fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -EFAULT;
-
- return 0;
-}
-
-static int fssetxattr_inode_update_fn(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct bch_inode_unpacked *bi,
- void *p)
-{
- struct flags_set *s = p;
-
- if (s->projid != bi->bi_project) {
- bi->bi_fields_set |= 1U << Inode_opt_project;
- bi->bi_project = s->projid;
- }
-
- return bch2_inode_flags_set(trans, inode, bi, p);
-}
-
-static int bch2_ioc_fssetxattr(struct bch_fs *c,
- struct file *file,
- struct bch_inode_info *inode,
- struct fsxattr __user *arg)
-{
- struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
- struct fsxattr fa;
- int ret;
-
- if (copy_from_user(&fa, arg, sizeof(fa)))
- return -EFAULT;
-
- s.set_projinherit = true;
- s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
- fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
-
- s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
- if (fa.fsx_xflags)
- return -EOPNOTSUPP;
-
- if (fa.fsx_projid >= U32_MAX)
- return -EINVAL;
-
- /*
- * inode fields accessible via the xattr interface are stored with a +1
- * bias, so that 0 means unset:
- */
- s.projid = fa.fsx_projid + 1;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- inode_lock(&inode->v);
- if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
- ret = -EACCES;
- goto err;
- }
-
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
- bch2_set_projid(c, inode, fa.fsx_projid) ?:
- bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
- ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
-err:
- inode_unlock(&inode->v);
- mnt_drop_write_file(file);
- return ret;
-}
-
static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
long ret;
switch (cmd) {
- case FS_IOC_GETFLAGS:
- ret = bch2_ioc_getflags(inode, (int __user *) arg);
- break;
-
- case FS_IOC_SETFLAGS:
- ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
- break;
-
- case FS_IOC_FSGETXATTR:
- ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
- break;
-
- case FS_IOC_FSSETXATTR:
- ret = bch2_ioc_fssetxattr(c, file, inode,
- (void __user *) arg);
- break;
-
case BCHFS_IOC_REINHERIT_ATTRS:
ret = bch2_ioc_reinherit_attrs(c, file, inode,
(void __user *) arg);
diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/fs-ioctl.h
index ecd3bfdc..a657e499 100644
--- a/libbcachefs/fs-ioctl.h
+++ b/libbcachefs/fs-ioctl.h
@@ -2,81 +2,6 @@
#ifndef _BCACHEFS_FS_IOCTL_H
#define _BCACHEFS_FS_IOCTL_H
-/* Inode flags: */
-
-/* bcachefs inode flags -> vfs inode flags: */
-static const __maybe_unused unsigned bch_flags_to_vfs[] = {
- [__BCH_INODE_sync] = S_SYNC,
- [__BCH_INODE_immutable] = S_IMMUTABLE,
- [__BCH_INODE_append] = S_APPEND,
- [__BCH_INODE_noatime] = S_NOATIME,
- [__BCH_INODE_casefolded] = S_CASEFOLD,
-};
-
-/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-static const __maybe_unused unsigned bch_flags_to_uflags[] = {
- [__BCH_INODE_sync] = FS_SYNC_FL,
- [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
- [__BCH_INODE_append] = FS_APPEND_FL,
- [__BCH_INODE_nodump] = FS_NODUMP_FL,
- [__BCH_INODE_noatime] = FS_NOATIME_FL,
- [__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
-};
-
-/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-static const __maybe_unused unsigned bch_flags_to_xflags[] = {
- [__BCH_INODE_sync] = FS_XFLAG_SYNC,
- [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
- [__BCH_INODE_append] = FS_XFLAG_APPEND,
- [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
- [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
- //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-};
-
-#define set_flags(_map, _in, _out) \
-do { \
- unsigned _i; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & (1 << _i)) \
- (_out) |= _map[_i]; \
- else \
- (_out) &= ~_map[_i]; \
-} while (0)
-
-#define map_flags(_map, _in) \
-({ \
- unsigned _out = 0; \
- \
- set_flags(_map, _in, _out); \
- _out; \
-})
-
-#define map_flags_rev(_map, _in) \
-({ \
- unsigned _i, _out = 0; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & _map[_i]) { \
- (_out) |= 1 << _i; \
- (_in) &= ~_map[_i]; \
- } \
- (_out); \
-})
-
-#define map_defined(_map) \
-({ \
- unsigned _in = ~0; \
- \
- map_flags_rev(_map, _in); \
-})
-
-/* Set VFS inode flags from bcachefs inode: */
-static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-{
- set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-}
-
long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index b032ec7a..0422dcab 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -33,6 +33,7 @@
#include <linux/backing-dev.h>
#include <linux/exportfs.h>
#include <linux/fiemap.h>
+#include <linux/fileattr.h>
#include <linux/fs_context.h>
#include <linux/module.h>
#include <linux/pagemap.h>
@@ -51,6 +52,22 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
struct bch_subvolume *);
+/* Set VFS inode flags from bcachefs inode: */
+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
+{
+ static const __maybe_unused unsigned bch_flags_to_vfs[] = {
+ [__BCH_INODE_sync] = S_SYNC,
+ [__BCH_INODE_immutable] = S_IMMUTABLE,
+ [__BCH_INODE_append] = S_APPEND,
+ [__BCH_INODE_noatime] = S_NOATIME,
+ };
+
+ set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
+
+ if (inode->ei_inode.bi_casefold)
+ inode->v.i_flags |= S_CASEFOLD;
+}
+
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@@ -1448,6 +1465,165 @@ static int bch2_open(struct inode *vinode, struct file *file)
return generic_file_open(vinode, file);
}
+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
+static const __maybe_unused unsigned bch_flags_to_uflags[] = {
+ [__BCH_INODE_sync] = FS_SYNC_FL,
+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+ [__BCH_INODE_append] = FS_APPEND_FL,
+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
+};
+
+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
+static const __maybe_unused unsigned bch_flags_to_xflags[] = {
+ [__BCH_INODE_sync] = FS_XFLAG_SYNC,
+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
+ [__BCH_INODE_append] = FS_XFLAG_APPEND,
+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
+};
+
+static int bch2_fileattr_get(struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+
+ fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
+
+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
+ fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
+
+ if (inode->ei_inode.bi_casefold)
+ fa->flags |= FS_CASEFOLD_FL;
+
+ fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
+ return 0;
+}
+
+struct flags_set {
+ unsigned mask;
+ unsigned flags;
+ unsigned projid;
+ bool set_project;
+ bool set_casefold;
+ bool casefold;
+};
+
+static int fssetxattr_inode_update_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_fs *c = trans->c;
+ struct flags_set *s = p;
+
+ /*
+ * We're relying on btree locking here for exclusion with other ioctl
+ * calls - use the flags in the btree (@bi), not inode->i_flags:
+ */
+ if (!S_ISREG(bi->bi_mode) &&
+ !S_ISDIR(bi->bi_mode) &&
+ (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
+ return -EINVAL;
+
+ if (s->casefold != bi->bi_casefold) {
+#ifdef CONFIG_UNICODE
+ int ret = 0;
+ /* Not supported on individual files. */
+ if (!S_ISDIR(bi->bi_mode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Make sure the dir is empty, as otherwise we'd need to
+ * rehash everything and update the dirent keys.
+ */
+ ret = bch2_empty_dir_trans(trans, inode_inum(inode));
+ if (ret < 0)
+ return ret;
+
+ ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+ if (ret)
+ return ret;
+
+ bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+
+ bi->bi_casefold = s->casefold;
+ bi->bi_fields_set &= ~BIT(Inode_opt_casefold);
+ bi->bi_fields_set |= s->casefold << Inode_opt_casefold;
+
+#else
+ printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ if (s->set_project) {
+ bi->bi_project = s->projid;
+ bi->bi_fields_set |= BIT(Inode_opt_project);
+ }
+
+ bi->bi_flags &= ~s->mask;
+ bi->bi_flags |= s->flags;
+
+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
+ return 0;
+}
+
+static int bch2_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct flags_set s = {};
+ int ret;
+
+ if (fa->fsx_valid) {
+ fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
+
+ s.mask = map_defined(bch_flags_to_xflags);
+ s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags);
+ if (fa->fsx_xflags)
+ return -EOPNOTSUPP;
+
+ if (fa->fsx_projid >= U32_MAX)
+ return -EINVAL;
+
+ /*
+ * inode fields accessible via the xattr interface are stored with a +1
+ * bias, so that 0 means unset:
+ */
+ if ((inode->ei_inode.bi_project ||
+ fa->fsx_projid) &&
+ inode->ei_inode.bi_project != fa->fsx_projid + 1) {
+ s.projid = fa->fsx_projid + 1;
+ s.set_project = true;
+ }
+ }
+
+ if (fa->flags_valid) {
+ s.mask = map_defined(bch_flags_to_uflags);
+
+ s.set_casefold = true;
+ s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0;
+ fa->flags &= ~FS_CASEFOLD_FL;
+
+ s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags);
+ if (fa->flags)
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
+ (s.set_project
+ ? bch2_set_projid(c, inode, fa->fsx_projid)
+ : 0) ?:
+ bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+ ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+ return ret;
+}
+
static const struct file_operations bch_file_operations = {
.open = bch2_open,
.llseek = bch2_llseek,
@@ -1475,6 +1651,8 @@ static const struct inode_operations bch_file_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_dir_inode_operations = {
@@ -1495,6 +1673,8 @@ static const struct inode_operations bch_dir_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct file_operations bch_dir_file_operations = {
@@ -1517,6 +1697,8 @@ static const struct inode_operations bch_symlink_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_special_inode_operations = {
@@ -1527,6 +1709,8 @@ static const struct inode_operations bch_special_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
+ .fileattr_get = bch2_fileattr_get,
+ .fileattr_set = bch2_fileattr_set,
};
static const struct address_space_operations bch_address_space_operations = {
@@ -2032,12 +2216,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
struct bch_fs *c = root->d_sb->s_fs_info;
bool first = true;
- for_each_online_member(c, ca) {
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca) {
if (!first)
seq_putc(seq, ':');
first = false;
seq_puts(seq, ca->disk_sb.sb_name);
}
+ rcu_read_unlock();
return 0;
}
@@ -2235,15 +2421,16 @@ got_sb:
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
- for_each_online_member(c, ca) {
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca) {
struct block_device *bdev = ca->disk_sb.bdev;
/* XXX: create an anonymous device for multi device filesystems */
sb->s_bdev = bdev;
sb->s_dev = bdev->bd_dev;
- percpu_ref_put(&ca->io_ref[READ]);
break;
}
+ rcu_read_unlock();
c->dev = sb->s_dev;
diff --git a/libbcachefs/inode_format.h b/libbcachefs/inode_format.h
index 117110af..87e193e8 100644
--- a/libbcachefs/inode_format.h
+++ b/libbcachefs/inode_format.h
@@ -103,7 +103,8 @@ struct bch_inode_generation {
x(bi_parent_subvol, 32) \
x(bi_nocow, 8) \
x(bi_depth, 32) \
- x(bi_inodes_32bit, 8)
+ x(bi_inodes_32bit, 8) \
+ x(bi_casefold, 8)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
@@ -117,7 +118,8 @@ struct bch_inode_generation {
x(background_target, 16) \
x(erasure_code, 16) \
x(nocow, 8) \
- x(inodes_32bit, 8)
+ x(inodes_32bit, 8) \
+ x(casefold, 8)
enum inode_opt_id {
#define x(name, ...) \
@@ -137,8 +139,7 @@ enum inode_opt_id {
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
x(backptr_untrusted, 8) \
- x(has_child_snapshot, 9) \
- x(casefolded, 10)
+ x(has_child_snapshot, 9)
/* bits 20+ reserved for packed fields below: */
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index 9a2d5ac5..f23c3bfe 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -9,6 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
@@ -17,6 +18,7 @@
#include "data_update.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_misc.h"
@@ -81,18 +83,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
/* Cache promotion on read */
-struct promote_op {
- struct rcu_head rcu;
- u64 start_time;
-
- struct rhash_head hash;
- struct bpos pos;
-
- struct work_struct work;
- struct data_update write;
- struct bio_vec bi_inline_vecs[]; /* must be last */
-};
-
static const struct rhashtable_params bch_promote_params = {
.head_offset = offsetof(struct promote_op, hash),
.key_offset = offsetof(struct promote_op, pos),
@@ -170,9 +160,11 @@ static noinline void promote_free(struct bch_read_bio *rbio)
bch_promote_params);
BUG_ON(ret);
+ async_object_list_del(c, promote, op->list_idx);
+
bch2_data_update_exit(&op->write);
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu);
}
@@ -237,7 +229,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
return NULL;
}
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_promote))
return ERR_PTR(-BCH_ERR_nopromote_no_writes);
struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
@@ -255,6 +247,10 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
goto err;
}
+ ret = async_object_list_add(c, promote, op, &op->list_idx);
+ if (ret < 0)
+ goto err_remove_hash;
+
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
&orig->opts,
@@ -266,7 +262,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
* -BCH_ERR_ENOSPC_disk_reservation:
*/
if (ret)
- goto err_remove_hash;
+ goto err_remove_list;
rbio_init_fragment(&op->write.rbio.bio, orig);
op->write.rbio.bounce = true;
@@ -274,6 +270,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
op->write.op.end_io = promote_done;
return &op->write.rbio;
+err_remove_list:
+ async_object_list_del(c, promote, op->list_idx);
err_remove_hash:
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params));
@@ -282,7 +280,7 @@ err:
/* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu);
err_put:
- bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
return ERR_PTR(ret);
}
@@ -346,6 +344,18 @@ nopromote:
return NULL;
}
+void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op)
+{
+ if (!op->write.read_done) {
+ prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
+ printbuf_indent_add(out, 2);
+ bch2_read_bio_to_text(out, op->write.rbio.parent);
+ printbuf_indent_sub(out, 2);
+ }
+
+ bch2_data_update_to_text(out, &op->write);
+}
+
/* Read */
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
@@ -402,7 +412,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
if (rbio->have_ioref) {
struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
}
if (rbio->split) {
@@ -414,6 +424,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
else
promote_free(rbio);
} else {
+ async_object_list_del(rbio->c, rbio, rbio->list_idx);
+
if (rbio->bounce)
bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
@@ -1090,7 +1102,8 @@ retry_pick:
goto err;
}
- struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
+ BCH_DEV_READ_REF_io_read);
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
@@ -1104,7 +1117,7 @@ retry_pick:
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, false);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
goto retry_pick;
}
@@ -1137,7 +1150,8 @@ retry_pick:
*/
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_io_read);
rbio->ret = -BCH_ERR_data_read_buffer_too_small;
goto out_read_done;
}
@@ -1191,6 +1205,8 @@ retry_pick:
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
rbio->bounce = true;
+
+ async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else if (flags & BCH_READ_must_clone) {
/*
* Have to clone if there were any splits, due to error
@@ -1204,6 +1220,8 @@ retry_pick:
&c->bio_read_split),
orig);
rbio->bio.bi_iter = iter;
+
+ async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else {
rbio = orig;
rbio->bio.bi_iter = iter;
@@ -1479,6 +1497,41 @@ err:
return ret;
}
+static const char * const bch2_read_bio_flags[] = {
+#define x(n) #n,
+ BCH_READ_FLAGS()
+#undef x
+ NULL
+};
+
+void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
+{
+ u64 now = local_clock();
+ prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0);
+ prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0);
+
+ if (!rbio->split)
+ prt_printf(out, "end_io:\t%ps\n", rbio->end_io);
+ else
+ prt_printf(out, "parent:\t%px\n", rbio->parent);
+
+ prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io);
+
+ prt_printf(out, "promote:\t%u\n", rbio->promote);
+ prt_printf(out, "bounce:\t%u\n", rbio->bounce);
+ prt_printf(out, "split:\t%u\n", rbio->split);
+ prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref);
+ prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs);
+ prt_printf(out, "context:\t%u\n", rbio->context);
+ prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret));
+
+ prt_printf(out, "flags:\t");
+ bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags);
+ prt_newline(out);
+
+ bch2_bio_to_text(out, &rbio->bio);
+}
+
void bch2_fs_io_read_exit(struct bch_fs *c)
{
if (c->promote_table.tbl)
diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h
index 1a85b092..c08b9c04 100644
--- a/libbcachefs/io_read.h
+++ b/libbcachefs/io_read.h
@@ -4,6 +4,7 @@
#include "bkey_buf.h"
#include "btree_iter.h"
+#include "extents_types.h"
#include "reflink.h"
struct bch_read_bio {
@@ -48,6 +49,9 @@ struct bch_read_bio {
u16 _state;
};
s16 ret;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ unsigned list_idx;
+#endif
struct extent_ptr_decoded pick;
@@ -173,6 +177,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
rbio->split = true;
rbio->parent = orig;
rbio->opts = orig->opts;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ rbio->list_idx = 0;
+#endif
return rbio;
}
@@ -190,9 +197,16 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio,
rbio->ret = 0;
rbio->opts = opts;
rbio->bio.bi_end_io = end_io;
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ rbio->list_idx = 0;
+#endif
return rbio;
}
+struct promote_op;
+void bch2_promote_op_to_text(struct printbuf *, struct promote_op *);
+void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *);
+
void bch2_fs_io_read_exit(struct bch_fs *);
int bch2_fs_io_read_init(struct bch_fs *);
diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c
index 4dabff3a..f7c5fcbf 100644
--- a/libbcachefs/io_write.c
+++ b/libbcachefs/io_write.c
@@ -15,6 +15,7 @@
#include "compress.h"
#include "debug.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extent_update.h"
#include "inode.h"
@@ -441,6 +442,10 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
struct bch_write_bio *n;
+ unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE;
+ unsigned ref_idx = type == BCH_DATA_btree
+ ? BCH_DEV_READ_REF_btree_node_write
+ : BCH_DEV_WRITE_REF_io_write;
BUG_ON(c->opts.nochanges);
@@ -452,7 +457,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
*/
struct bch_dev *ca = nocow
? bch2_dev_have_ref(c, ptr->dev)
- : bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE);
+ : bch2_dev_get_ioref(c, ptr->dev, ref_rw, ref_idx);
if (to_entry(ptr + 1) < ptrs.end) {
n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_set));
@@ -512,7 +517,7 @@ static void bch2_write_done(struct closure *cl)
bch2_disk_reservation_put(c, &op->res);
if (!(op->flags & BCH_WRITE_move))
- bch2_write_ref_put(c, BCH_WRITE_REF_write);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
EBUG_ON(cl->parent);
@@ -727,7 +732,8 @@ static void bch2_write_endio(struct bio *bio)
}
if (wbio->have_ioref)
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_io_write);
if (wbio->bounce)
bch2_bio_free_pages_pool(c, bio);
@@ -1324,7 +1330,8 @@ retry:
/* Get iorefs before dropping btree locks: */
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
+ BCH_DEV_WRITE_REF_io_write);
if (unlikely(!ca))
goto err_get_ioref;
@@ -1426,7 +1433,8 @@ err:
return;
err_get_ioref:
darray_for_each(buckets, i)
- percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]);
+ enumerated_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_io_write);
/* Fall back to COW path: */
goto out;
@@ -1660,7 +1668,7 @@ CLOSURE_CALLBACK(bch2_write)
}
if (!(op->flags & BCH_WRITE_move) &&
- !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
+ !enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
}
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 3694b83a..f2963a6c 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -12,6 +12,7 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
@@ -699,8 +700,10 @@ static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;
- for_each_rw_member(c, ca)
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
+ rcu_read_unlock();
return nsecs_to_jiffies(nsecs);
}
@@ -987,11 +990,11 @@ int bch2_journal_meta(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_journal))
return -BCH_ERR_erofs_no_writes;
int ret = __bch2_journal_meta(j);
- bch2_write_ref_put(c, BCH_WRITE_REF_journal);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_journal);
return ret;
}
@@ -1333,13 +1336,14 @@ err:
int bch2_fs_journal_alloc(struct bch_fs *c)
{
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) {
if (ca->journal.nr)
continue;
int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_journal_alloc);
return ret;
}
}
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 913125df..58e3983d 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
out:
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
kvfree(buf.data);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read);
closure_return(cl);
return;
err:
@@ -1253,7 +1253,8 @@ int bch2_journal_read(struct bch_fs *c,
if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
ca->mi.state == BCH_MEMBER_STATE_ro) &&
- percpu_ref_tryget(&ca->io_ref[READ]))
+ enumerated_ref_tryget(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_journal_read))
closure_call(&ca->journal.read,
bch2_journal_read_device,
system_unbound_wq,
@@ -1768,7 +1769,7 @@ static void journal_write_endio(struct bio *bio)
}
closure_put(&w->io);
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
}
static CLOSURE_CALLBACK(journal_write_submit)
@@ -1779,7 +1780,8 @@ static CLOSURE_CALLBACK(journal_write_submit)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
+ BCH_DEV_WRITE_REF_journal_write);
if (!ca) {
/* XXX: fix this */
bch_err(c, "missing device for journal write\n");
@@ -1842,8 +1844,9 @@ static CLOSURE_CALLBACK(journal_write_preflush)
}
if (w->separate_flush) {
- for_each_rw_member(c, ca) {
- percpu_ref_get(&ca->io_ref[WRITE]);
+ for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_write) {
+ enumerated_ref_get(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_journal_write);
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio[w->idx]->bio;
@@ -2053,12 +2056,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
- unsigned nr_rw_members = 0;
+ unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
int ret;
- for_each_rw_member(c, ca)
- nr_rw_members++;
-
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
BUG_ON(!w->write_started);
BUG_ON(w->write_allocated);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index d1c0fc59..66bfb95f 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -285,7 +285,7 @@ void bch2_journal_do_discards(struct journal *j)
mutex_lock(&j->discard_lock);
- for_each_rw_member(c, ca) {
+ for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) {
struct journal_device *ja = &ca->journal;
while (should_discard_bucket(j, ja)) {
@@ -617,7 +617,8 @@ static u64 journal_seq_to_flush(struct journal *j)
spin_lock(&j->lock);
- for_each_rw_member(c, ca) {
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca) {
struct journal_device *ja = &ca->journal;
unsigned nr_buckets, bucket_to_flush;
@@ -631,6 +632,7 @@ static u64 journal_seq_to_flush(struct journal *j)
seq_to_flush = max(seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
}
+ rcu_read_unlock();
/* Also flush if the pin fifo is more than half full */
seq_to_flush = max_t(s64, seq_to_flush,
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 159410c5..e97e87eb 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -279,7 +279,8 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
{
s64 wait = S64_MAX, fragmented_allowed, fragmented;
- for_each_rw_member(c, ca) {
+ rcu_read_lock();
+ for_each_rw_member_rcu(c, ca) {
struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca);
struct bch_dev_usage usage;
@@ -296,6 +297,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
}
+ rcu_read_unlock();
return wait;
}
@@ -356,6 +358,13 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ */
+ kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_should_stop());
+
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),
diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c
index 0d65ea96..46f3c8b1 100644
--- a/libbcachefs/namei.c
+++ b/libbcachefs/namei.c
@@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
- /* Inherit casefold state from parent. */
- if (S_ISDIR(mode))
- new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
-
if (!(flags & BCH_CREATE_SNAPSHOT)) {
/* Normal create path - allocate a new inode: */
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 5cdb1880..b8cd0b04 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -229,6 +229,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_ERASURE_CODE, false, \
NULL, "Enable erasure coding (DO NOT USE YET)") \
+ x(casefold, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT, \
+ OPT_BOOL(), \
+ BCH_SB_CASEFOLD, false, \
+ NULL, "Dirent lookups are casefolded") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index b0314674..a81ffff4 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -587,6 +587,13 @@ static int bch2_rebalance_thread(void *arg)
set_freezable();
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ */
+ kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+ kthread_should_stop());
+
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
writepoint_ptr(&c->rebalance_write_point),
true);
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index df953b64..e36fcab9 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -18,6 +18,7 @@
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
+#include "movinggc.h"
#include "namei.h"
#include "quota.h"
#include "rebalance.h"
@@ -1214,6 +1215,9 @@ int bch2_fs_initialize(struct bch_fs *c)
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
+ bch2_copygc_wakeup(c);
+ bch2_rebalance_wakeup(c);
+
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
if (ret)
diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c
index 7a644408..87150dd3 100644
--- a/libbcachefs/recovery_passes.c
+++ b/libbcachefs/recovery_passes.c
@@ -12,6 +12,7 @@
#include "journal.h"
#include "lru.h"
#include "logged_ops.h"
+#include "movinggc.h"
#include "rebalance.h"
#include "recovery.h"
#include "recovery_passes.h"
@@ -311,49 +312,52 @@ int bch2_run_recovery_passes(struct bch_fs *c)
*/
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
- c->next_recovery_pass = c->curr_recovery_pass + 1;
+ spin_lock_irq(&c->recovery_pass_lock);
- spin_lock_irq(&c->recovery_pass_lock);
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
+ unsigned prev_done = c->recovery_pass_done;
unsigned pass = c->curr_recovery_pass;
+ c->next_recovery_pass = pass + 1;
+
if (c->opts.recovery_pass_last &&
- c->curr_recovery_pass > c->opts.recovery_pass_last) {
- spin_unlock_irq(&c->recovery_pass_lock);
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
- }
- if (!should_run_recovery_pass(c, pass)) {
- c->curr_recovery_pass++;
- c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ if (should_run_recovery_pass(c, pass)) {
spin_unlock_irq(&c->recovery_pass_lock);
- continue;
- }
- spin_unlock_irq(&c->recovery_pass_lock);
-
- ret = bch2_run_recovery_pass(c, pass) ?:
- bch2_journal_flush(&c->journal);
-
- if (!ret && !test_bit(BCH_FS_error, &c->flags))
- bch2_clear_recovery_pass_required(c, pass);
-
- spin_lock_irq(&c->recovery_pass_lock);
- if (c->next_recovery_pass < c->curr_recovery_pass) {
- /*
- * bch2_run_explicit_recovery_pass() was called: we
- * can't always catch -BCH_ERR_restart_recovery because
- * it may have been called from another thread (btree
- * node read completion)
- */
- ret = 0;
- c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
- } else {
- c->recovery_passes_complete |= BIT_ULL(pass);
- c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ ret = bch2_run_recovery_pass(c, pass) ?:
+ bch2_journal_flush(&c->journal);
+
+ if (!ret && !test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, pass);
+ spin_lock_irq(&c->recovery_pass_lock);
+
+ if (c->next_recovery_pass < c->curr_recovery_pass) {
+ /*
+ * bch2_run_explicit_recovery_pass() was called: we
+ * can't always catch -BCH_ERR_restart_recovery because
+ * it may have been called from another thread (btree
+ * node read completion)
+ */
+ ret = 0;
+ c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
+ } else {
+ c->recovery_passes_complete |= BIT_ULL(pass);
+ c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ }
}
+
c->curr_recovery_pass = c->next_recovery_pass;
- spin_unlock_irq(&c->recovery_pass_lock);
+
+ if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
+ c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
+ bch2_copygc_wakeup(c);
+ bch2_rebalance_wakeup(c);
+ }
}
+ spin_unlock_irq(&c->recovery_pass_lock);
+
return ret;
}
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 710178e3..3a13dbca 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -3,6 +3,7 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "inode.h"
@@ -610,7 +611,7 @@ s64 bch2_remap_range(struct bch_fs *c,
!bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts);
int ret = 0, ret2 = 0;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_reflink))
return -BCH_ERR_erofs_no_writes;
bch2_check_set_feature(c, BCH_FEATURE_reflink);
@@ -761,7 +762,7 @@ err:
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
- bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_reflink);
return dst_done ?: ret ?: ret2;
}
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index e2a8d03a..0337e34d 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -4,6 +4,7 @@
#include "darray.h"
#include "bkey_types.h"
+#include "enumerated_ref.h"
extern char * const bch2_member_error_strs[];
@@ -20,7 +21,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
- return !percpu_ref_is_zero(&ca->io_ref[READ]);
+ return !enumerated_ref_is_zero(&ca->io_ref[READ]);
}
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
@@ -104,6 +105,12 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
for (struct bch_dev *_ca = NULL; \
(_ca = __bch2_next_dev((_c), _ca, (_mask)));)
+#define for_each_online_member_rcu(_c, _ca) \
+ for_each_member_device_rcu(_c, _ca, &(_c)->online_devs)
+
+#define for_each_rw_member_rcu(_c, _ca) \
+ for_each_member_device_rcu(_c, _ca, &(_c)->rw_devs[BCH_DATA_free])
+
static inline void bch2_dev_get(struct bch_dev *ca)
{
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -157,33 +164,33 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev
static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
struct bch_dev *ca,
unsigned state_mask,
- int rw)
+ int rw, unsigned ref_idx)
{
rcu_read_lock();
if (ca)
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref_idx);
while ((ca = __bch2_next_dev(c, ca, NULL)) &&
(!((1 << ca->mi.state) & state_mask) ||
- !percpu_ref_tryget(&ca->io_ref[rw])))
+ !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)))
;
rcu_read_unlock();
return ca;
}
-#define __for_each_online_member(_c, _ca, state_mask, rw) \
+#define __for_each_online_member(_c, _ca, state_mask, rw, ref_idx) \
for (struct bch_dev *_ca = NULL; \
- (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));)
+ (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw, ref_idx));)
-#define for_each_online_member(c, ca) \
- __for_each_online_member(c, ca, ~0, READ)
+#define for_each_online_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, ~0, READ, ref_idx)
-#define for_each_rw_member(c, ca) \
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE)
+#define for_each_rw_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE, ref_idx)
-#define for_each_readable_member(c, ca) \
- __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ)
+#define for_each_readable_member(c, ca, ref_idx) \
+ __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ, ref_idx)
static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev)
{
@@ -284,13 +291,14 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev
return bch2_dev_tryget(c, dev_idx);
}
-static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw)
+static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
+ int rw, unsigned ref_idx)
{
might_sleep();
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
- if (ca && !percpu_ref_tryget(&ca->io_ref[rw]))
+ if (ca && !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
ca = NULL;
rcu_read_unlock();
@@ -300,21 +308,10 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
return ca;
if (ca)
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref_idx);
return NULL;
}
-/* XXX kill, move to struct bch_fs */
-static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-{
- struct bch_devs_mask devs;
-
- memset(&devs, 0, sizeof(devs));
- for_each_online_member(c, ca)
- __set_bit(ca->dev_idx, devs.d);
- return devs;
-}
-
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 2eede851..14ea09cc 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -6,6 +6,7 @@
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -1661,18 +1662,18 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
{
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots))
return;
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index 09a354a2..a67ee4ad 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -48,7 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
- .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
+ .cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 1b9fb60c..39376c87 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -517,7 +518,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
darray_exit(&s);
}
- bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
}
struct subvolume_unlink_hook {
@@ -540,11 +541,11 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
if (ret)
return ret;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache))
return -EROFS;
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
return 0;
}
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index bc2721b9..d53cbc5f 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -260,11 +260,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
/* XXX: we're not checking that offline device have enough space */
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_field_resize) {
struct bch_sb_handle *dev_sb = &ca->disk_sb;
if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_sb_field_resize);
return NULL;
}
}
@@ -967,7 +967,7 @@ static void write_super_endio(struct bio *bio)
}
closure_put(&ca->fs->sb_write);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
@@ -985,7 +985,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio));
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@@ -1011,7 +1011,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
bio_sectors(bio));
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@@ -1043,13 +1043,13 @@ int bch2_write_super(struct bch_fs *c)
* For now, we expect to be able to call write_super() when we're not
* yet RW:
*/
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_write_super) {
ret = darray_push(&online_devices, ca);
if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
goto out;
}
- percpu_ref_get(&ca->io_ref[READ]);
+ enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
/* Make sure we're using the new magic numbers: */
@@ -1108,7 +1108,8 @@ int bch2_write_super(struct bch_fs *c)
prt_str(&buf, ")");
bch2_fs_fatal_error(c, ": %s", buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_sb_not_downgraded;
+ ret = -BCH_ERR_sb_not_downgraded;
+ goto out;
}
darray_for_each(online_devices, ca) {
@@ -1215,7 +1216,7 @@ out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
darray_for_each(online_devices, ca)
- percpu_ref_put(&(*ca)->io_ref[READ]);
+ enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super);
darray_exit(&online_devices);
printbuf_exit(&err);
return ret;
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 4632230f..f861583c 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -10,6 +10,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "async_objs.h"
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_gc.h"
@@ -28,6 +29,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -77,13 +79,28 @@ MODULE_DESCRIPTION("bcachefs filesystem");
typedef DARRAY(struct bch_sb_handle) bch_sb_handles;
-const char * const bch2_fs_flag_strs[] = {
#define x(n) #n,
+const char * const bch2_fs_flag_strs[] = {
BCH_FS_FLAGS()
-#undef x
NULL
};
+const char * const bch2_write_refs[] = {
+ BCH_WRITE_REFS()
+ NULL
+};
+
+const char * const bch2_dev_read_refs[] = {
+ BCH_DEV_READ_REFS()
+ NULL
+};
+
+const char * const bch2_dev_write_refs[] = {
+ BCH_DEV_WRITE_REFS()
+ NULL
+};
+#undef x
+
static void __bch2_print_str(struct bch_fs *c, const char *prefix,
const char *str, bool nonblocking)
{
@@ -311,15 +328,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
}
}
-#ifndef BCH_WRITE_REF_DEBUG
-static void bch2_writes_disabled(struct percpu_ref *writes)
+static void bch2_writes_disabled(struct enumerated_ref *writes)
{
struct bch_fs *c = container_of(writes, struct bch_fs, writes);
set_bit(BCH_FS_write_disable_complete, &c->flags);
wake_up(&bch2_read_only_wait);
}
-#endif
void bch2_fs_read_only(struct bch_fs *c)
{
@@ -337,12 +352,7 @@ void bch2_fs_read_only(struct bch_fs *c)
* writes will return -EROFS:
*/
set_bit(BCH_FS_going_ro, &c->flags);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_kill(&c->writes);
-#else
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
- bch2_write_ref_put(c, i);
-#endif
+ enumerated_ref_stop_async(&c->writes);
/*
* If we're not doing an emergency shutdown, we want to wait on
@@ -432,32 +442,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
return ret;
}
-static int bch2_fs_read_write_late(struct bch_fs *c)
-{
- int ret;
-
- /*
- * Data move operations can't run until after check_snapshots has
- * completed, and bch2_snapshot_is_ancestor() is available.
- *
- * Ideally we'd start copygc/rebalance earlier instead of waiting for
- * all of recovery/fsck to complete:
- */
- ret = bch2_copygc_start(c);
- if (ret) {
- bch_err(c, "error starting copygc thread");
- return ret;
- }
-
- ret = bch2_rebalance_start(c);
- if (ret) {
- bch_err(c, "error starting rebalance thread");
- return ret;
- }
-
- return 0;
-}
-
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
int ret;
@@ -492,10 +476,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_clean_shutdown, &c->flags);
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
- bch2_dev_allocator_add(c, ca);
- percpu_ref_reinit(&ca->io_ref[WRITE]);
- }
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw) {
+ bch2_dev_allocator_add(c, ca);
+ enumerated_ref_start(&ca->io_ref[WRITE]);
+ }
+ rcu_read_unlock();
+
bch2_recalc_capacity(c);
/*
@@ -521,18 +509,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_rw, &c->flags);
set_bit(BCH_FS_was_rw, &c->flags);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_reinit(&c->writes);
-#else
- for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
- BUG_ON(atomic_long_read(&c->writes[i]));
- atomic_long_inc(&c->writes[i]);
+ enumerated_ref_start(&c->writes);
+
+ ret = bch2_copygc_start(c);
+ if (ret) {
+ bch_err_msg(c, ret, "error starting copygc thread");
+ goto err;
}
-#endif
- if (!early) {
- ret = bch2_fs_read_write_late(c);
- if (ret)
- goto err;
+
+ ret = bch2_rebalance_start(c);
+ if (ret) {
+ bch_err_msg(c, ret, "error starting rebalance thread");
+ goto err;
}
bch2_do_discards(c);
@@ -583,6 +571,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_free_pending_node_rewrites(c);
bch2_free_fsck_errs(c);
bch2_fs_accounting_exit(c);
+ bch2_fs_async_obj_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -625,9 +614,7 @@ static void __bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter);
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_exit(&c->writes);
-#endif
+ enumerated_ref_exit(&c->writes);
kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
@@ -666,6 +653,12 @@ void __bch2_fs_stop(struct bch_fs *c)
bch2_fs_read_only(c);
up_write(&c->state_lock);
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
+ if (ca)
+ bch2_dev_io_ref_stop(ca, READ);
+ }
+
for_each_member_device(c, ca)
bch2_dev_unlink(ca);
@@ -694,8 +687,6 @@ void __bch2_fs_stop(struct bch_fs *c)
void bch2_fs_free(struct bch_fs *c)
{
- unsigned i;
-
mutex_lock(&bch_fs_list_lock);
list_del(&c->list);
mutex_unlock(&bch_fs_list_lock);
@@ -703,7 +694,7 @@ void bch2_fs_free(struct bch_fs *c)
closure_sync(&c->cl);
closure_debug_destroy(&c->cl);
- for (i = 0; i < c->sb.nr_devices; i++) {
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
if (ca) {
@@ -974,10 +965,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_init(&c->writes, bch2_writes_disabled,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-#endif
+ enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR,
+ bch2_writes_disabled) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->btree_bio, 1,
max(offsetof(struct btree_read_bio, bio),
@@ -994,6 +983,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
}
ret =
+ bch2_fs_async_obj_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
@@ -1107,14 +1097,20 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
}
- for_each_online_member(c, ca)
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(now);
+ rcu_read_unlock();
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- for_each_rw_member(c, ca)
- bch2_dev_allocator_add(c, ca);
+ rcu_read_lock();
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ bch2_dev_allocator_add(c, ca);
+ rcu_read_unlock();
bch2_recalc_capacity(c);
up_write(&c->state_lock);
@@ -1140,13 +1136,10 @@ int bch2_fs_start(struct bch_fs *c)
wake_up(&c->ro_ref_wait);
down_write(&c->state_lock);
- if (c->opts.read_only) {
+ if (c->opts.read_only)
bch2_fs_read_only(c);
- } else {
- ret = !test_bit(BCH_FS_rw, &c->flags)
- ? bch2_fs_read_write(c)
- : bch2_fs_read_write_late(c);
- }
+ else if (!test_bit(BCH_FS_rw, &c->flags))
+ ret = bch2_fs_read_write(c);
up_write(&c->state_lock);
err:
@@ -1264,11 +1257,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
{
- if (!percpu_ref_is_zero(&ca->io_ref[rw])) {
- reinit_completion(&ca->io_ref_completion[rw]);
- percpu_ref_kill(&ca->io_ref[rw]);
- wait_for_completion(&ca->io_ref_completion[rw]);
- }
+ if (rw == READ)
+ clear_bit(ca->dev_idx, ca->fs->online_devs.d);
+
+ if (!enumerated_ref_is_zero(&ca->io_ref[rw]))
+ enumerated_ref_stop(&ca->io_ref[rw],
+ rw == READ
+ ? bch2_dev_read_refs
+ : bch2_dev_write_refs);
}
static void bch2_dev_release(struct kobject *kobj)
@@ -1280,8 +1276,8 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
- WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
- WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
+ WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
+ WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
cancel_work_sync(&ca->io_error_work);
@@ -1301,8 +1297,8 @@ static void bch2_dev_free(struct bch_dev *ca)
bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]);
bch2_time_stats_quantiles_exit(&ca->io_latency[READ]);
- percpu_ref_exit(&ca->io_ref[WRITE]);
- percpu_ref_exit(&ca->io_ref[READ]);
+ enumerated_ref_exit(&ca->io_ref[WRITE]);
+ enumerated_ref_exit(&ca->io_ref[READ]);
#ifndef CONFIG_BCACHEFS_DEBUG
percpu_ref_exit(&ca->ref);
#endif
@@ -1314,7 +1310,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
- if (percpu_ref_is_zero(&ca->io_ref[READ]))
+ if (enumerated_ref_is_zero(&ca->io_ref[READ]))
return;
__bch2_dev_read_only(c, ca);
@@ -1336,20 +1332,6 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref)
}
#endif
-static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref)
-{
- struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]);
-
- complete(&ca->io_ref_completion[READ]);
-}
-
-static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref)
-{
- struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]);
-
- complete(&ca->io_ref_completion[WRITE]);
-}
-
static void bch2_dev_unlink(struct bch_dev *ca)
{
struct kobject *b;
@@ -1411,8 +1393,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
kobject_init(&ca->kobj, &bch2_dev_ktype);
init_completion(&ca->ref_completion);
- init_completion(&ca->io_ref_completion[READ]);
- init_completion(&ca->io_ref_completion[WRITE]);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
@@ -1438,10 +1418,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
bch2_dev_allocator_background_init(ca);
- if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
- percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete,
- PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+ if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) ||
+ enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) ||
!(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
!(ca->io_done = alloc_percpu(*ca->io_done)))
@@ -1503,8 +1481,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
return -BCH_ERR_device_size_too_small;
}
- BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
- BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
+ BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
+ BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
ret = bch2_dev_journal_init(ca, sb->sb);
if (ret)
@@ -1523,7 +1501,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
ca->dev = ca->disk_sb.bdev->bd_dev;
- percpu_ref_reinit(&ca->io_ref[READ]);
+ enumerated_ref_start(&ca->io_ref[READ]);
return 0;
}
@@ -1547,6 +1525,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
if (ret)
return ret;
+ set_bit(ca->dev_idx, c->online_devs.d);
+
bch2_dev_sysfs_online(c, ca);
struct printbuf name = PRINTBUF;
@@ -1604,7 +1584,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
return true;
/* do we have enough devices to read from? */
- new_online_devs = bch2_online_devs(c);
+ new_online_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_online_devs.d);
return bch2_have_enough_devs(c, new_online_devs, flags, false);
@@ -1644,7 +1624,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
break;
}
- return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
+ return bch2_have_enough_devs(c, c->online_devs, flags, true);
}
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
@@ -1668,8 +1648,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
- if (percpu_ref_is_zero(&ca->io_ref[WRITE]))
- percpu_ref_reinit(&ca->io_ref[WRITE]);
+ if (enumerated_ref_is_zero(&ca->io_ref[WRITE]))
+ enumerated_ref_start(&ca->io_ref[WRITE]);
bch2_dev_do_discards(ca);
}
@@ -1819,7 +1799,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
err:
if (test_bit(BCH_FS_rw, &c->flags) &&
ca->mi.state == BCH_MEMBER_STATE_rw &&
- !percpu_ref_is_zero(&ca->io_ref[READ]))
+ !enumerated_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
up_write(&c->state_lock);
return ret;
@@ -2120,7 +2100,7 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
{
down_write(&c->state_lock);
- for_each_online_member(c, ca) {
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
u64 old_nbuckets = ca->mi.nbuckets;
u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
ca->mi.bucket_size);
@@ -2131,7 +2111,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets);
bch_err_fn(ca, ret);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_resize_on_mount);
up_write(&c->state_lock);
return ret;
}
@@ -2149,7 +2130,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
if (ca->mi.freespace_initialized) {
ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets);
if (ret) {
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ],
+ BCH_DEV_READ_REF_fs_resize_on_mount);
up_write(&c->state_lock);
return ret;
}
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index 502d6c57..dbf59547 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -9,6 +9,9 @@
#include <linux/math64.h>
extern const char * const bch2_fs_flag_strs[];
+extern const char * const bch2_write_refs[];
+extern const char * const bch2_dev_read_refs[];
+extern const char * const bch2_dev_write_refs[];
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 455c6ae9..7c840b47 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -25,6 +25,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "inode.h"
#include "journal.h"
#include "journal_reclaim.h"
@@ -178,25 +179,9 @@ read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(nocow_lock_table);
-#ifdef BCH_WRITE_REF_DEBUG
+read_attribute(read_refs);
read_attribute(write_refs);
-static const char * const bch2_write_refs[] = {
-#define x(n) #n,
- BCH_WRITE_REFS()
-#undef x
- NULL
-};
-
-static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
-{
- bch2_printbuf_tabstop_push(out, 24);
-
- for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++)
- prt_printf(out, "%s\t%li\n", bch2_write_refs[i], atomic_long_read(&c->writes[i]));
-}
-#endif
-
read_attribute(internal_uuid);
read_attribute(disk_groups);
@@ -324,7 +309,7 @@ static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
bch2_time_stats_init_no_pcpu(&stats_fua);
bch2_time_stats_init_no_pcpu(&stats_random);
- if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) {
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, READ, BCH_DEV_READ_REF_read_fua_test)) {
prt_str(out, "offline\n");
return 0;
}
@@ -415,7 +400,7 @@ static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
err:
kfree(buf);
kfree(bio);
- percpu_ref_put(&ca->io_ref[READ]);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_read_fua_test);
bch_err_fn(c, ret);
return ret;
}
@@ -481,10 +466,8 @@ SHOW(bch2_fs)
if (attr == &sysfs_moving_ctxts)
bch2_fs_moving_ctxts_to_text(out, c);
-#ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs)
- bch2_write_refs_to_text(out, c);
-#endif
+ enumerated_ref_to_text(out, &c->writes, bch2_write_refs);
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
@@ -517,7 +500,7 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_btree_updates)
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_trigger_btree_cache_shrink) {
@@ -577,7 +560,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
- bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@@ -670,9 +653,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_open_buckets,
&sysfs_open_buckets_partial,
-#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,
-#endif
&sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
@@ -738,7 +719,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
- if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
+ if (unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs)))
return -EROFS;
char *tmp = kstrdup(buf, GFP_KERNEL);
@@ -765,7 +746,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
ret = size;
err:
- bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return ret;
}
@@ -919,6 +900,12 @@ SHOW(bch2_dev)
if (opt_id >= 0)
return sysfs_opt_show(c, ca, opt_id, out);
+ if (attr == &sysfs_read_refs)
+ enumerated_ref_to_text(out, &ca->io_ref[READ], bch2_dev_read_refs);
+
+ if (attr == &sysfs_write_refs)
+ enumerated_ref_to_text(out, &ca->io_ref[WRITE], bch2_dev_write_refs);
+
return 0;
}
@@ -976,6 +963,9 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
+
+ &sysfs_read_refs,
+ &sysfs_write_refs,
NULL
};
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 6e5d7fc2..7e6ebe8c 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -715,6 +715,16 @@ void bch2_corrupt_bio(struct bio *bio)
}
#endif
+void bch2_bio_to_text(struct printbuf *out, struct bio *bio)
+{
+ prt_printf(out, "bi_remaining:\t%u\n",
+ atomic_read(&bio->__bi_remaining));
+ prt_printf(out, "bi_end_io:\t%ps\n",
+ bio->bi_end_io);
+ prt_printf(out, "bi_status:\t%u\n",
+ bio->bi_status);
+}
+
#if 0
void eytzinger1_test(void)
{
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index fa742af9..7a93e187 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -419,6 +419,8 @@ static inline void bch2_maybe_corrupt_bio(struct bio *bio, unsigned ratio)
#define bch2_maybe_corrupt_bio(...) do {} while (0)
#endif
+void bch2_bio_to_text(struct printbuf *, struct bio *);
+
static inline void memcpy_u64s_small(void *dst, const void *src,
unsigned u64s)
{
@@ -739,4 +741,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len)
*--dst = *src++;
}
+#define set_flags(_map, _in, _out) \
+do { \
+ unsigned _i; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & (1 << _i)) \
+ (_out) |= _map[_i]; \
+ else \
+ (_out) &= ~_map[_i]; \
+} while (0)
+
+#define map_flags(_map, _in) \
+({ \
+ unsigned _out = 0; \
+ \
+ set_flags(_map, _in, _out); \
+ _out; \
+})
+
+#define map_flags_rev(_map, _in) \
+({ \
+ unsigned _i, _out = 0; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & _map[_i]) { \
+ (_out) |= 1 << _i; \
+ (_in) &= ~_map[_i]; \
+ } \
+ (_out); \
+})
+
+#define map_defined(_map) \
+({ \
+ unsigned _in = ~0; \
+ \
+ map_flags_rev(_map, _in); \
+})
+
#endif /* _BCACHEFS_UTIL_H */
diff --git a/linux/wait.c b/linux/wait.c
index b1f002b9..e5fe2cb9 100644
--- a/linux/wait.c
+++ b/linux/wait.c
@@ -248,3 +248,28 @@ void wait_for_completion(struct completion *x)
out:
spin_unlock_irq(&x->wait.lock);
}
+
+unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+{
+ spin_lock_irq(&x->wait.lock);
+
+ if (!x->done) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ do {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&x->wait.lock);
+
+ timeout = schedule_timeout(timeout);
+ spin_lock_irq(&x->wait.lock);
+ } while (!x->done);
+ __remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ goto out;
+ }
+ x->done--;
+out:
+ spin_unlock_irq(&x->wait.lock);
+ return timeout;
+}