summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/alloc_background.c18
-rw-r--r--libbcachefs/alloc_background.h9
-rw-r--r--libbcachefs/alloc_foreground.c2
-rw-r--r--libbcachefs/backpointers.c2
-rw-r--r--libbcachefs/bcachefs.h12
-rw-r--r--libbcachefs/btree_cache.c19
-rw-r--r--libbcachefs/btree_cache.h11
-rw-r--r--libbcachefs/btree_gc.c31
-rw-r--r--libbcachefs/btree_io.c57
-rw-r--r--libbcachefs/btree_iter.c88
-rw-r--r--libbcachefs/btree_iter.h14
-rw-r--r--libbcachefs/btree_journal_iter.c37
-rw-r--r--libbcachefs/btree_key_cache.c15
-rw-r--r--libbcachefs/btree_locking.c12
-rw-r--r--libbcachefs/btree_node_scan.c6
-rw-r--r--libbcachefs/btree_node_scan.h2
-rw-r--r--libbcachefs/btree_trans_commit.c10
-rw-r--r--libbcachefs/btree_types.h8
-rw-r--r--libbcachefs/btree_update.c107
-rw-r--r--libbcachefs/btree_update.h12
-rw-r--r--libbcachefs/btree_update_interior.c21
-rw-r--r--libbcachefs/btree_write_buffer.c5
-rw-r--r--libbcachefs/btree_write_buffer.h6
-rw-r--r--libbcachefs/data_update.c13
-rw-r--r--libbcachefs/dirent.c18
-rw-r--r--libbcachefs/ec.c4
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/error.c18
-rw-r--r--libbcachefs/fs-io-buffered.c10
-rw-r--r--libbcachefs/fs.c28
-rw-r--r--libbcachefs/fsck.c90
-rw-r--r--libbcachefs/inode.c61
-rw-r--r--libbcachefs/io_misc.c27
-rw-r--r--libbcachefs/io_misc.h2
-rw-r--r--libbcachefs/io_read.c13
-rw-r--r--libbcachefs/journal.c16
-rw-r--r--libbcachefs/journal_io.c53
-rw-r--r--libbcachefs/journal_io.h7
-rw-r--r--libbcachefs/journal_seq_blacklist.c46
-rw-r--r--libbcachefs/journal_seq_blacklist.h3
-rw-r--r--libbcachefs/namei.c2
-rw-r--r--libbcachefs/opts.h16
-rw-r--r--libbcachefs/rebalance.c2
-rw-r--r--libbcachefs/recovery.c3
-rw-r--r--libbcachefs/recovery_passes.c13
-rw-r--r--libbcachefs/reflink.c16
-rw-r--r--libbcachefs/sb-errors_format.h9
-rw-r--r--libbcachefs/sb-members.c29
-rw-r--r--libbcachefs/sb-members.h1
-rw-r--r--libbcachefs/snapshot.c6
-rw-r--r--libbcachefs/snapshot.h2
-rw-r--r--libbcachefs/str_hash.c6
-rw-r--r--libbcachefs/str_hash.h6
-rw-r--r--libbcachefs/subvolume.c4
-rw-r--r--libbcachefs/super-io.c5
-rw-r--r--libbcachefs/super.c22
-rw-r--r--libbcachefs/sysfs.c117
-rw-r--r--libbcachefs/trace.h132
-rw-r--r--libbcachefs/util.c20
59 files changed, 873 insertions, 452 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 66de4631..d64839c7 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -1381,7 +1381,7 @@ static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct
u8 gen;
ret = k.k->type != KEY_TYPE_set
- ? bch2_check_discard_freespace_key(trans, &iter, &gen, false)
+ ? __bch2_check_discard_freespace_key(trans, &iter, &gen, FSCK_ERR_SILENT)
: 0;
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -1397,8 +1397,8 @@ static void check_discard_freespace_key_work(struct work_struct *work)
kfree(w);
}
-int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen,
- bool async_repair)
+int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen,
+ enum bch_fsck_flags fsck_flags)
{
struct bch_fs *c = trans->c;
enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
@@ -1406,8 +1406,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
: BCH_DATA_free;
struct printbuf buf = PRINTBUF;
- unsigned fsck_flags = (async_repair ? FSCK_ERR_NO_LOG : 0)|
- FSCK_CAN_FIX|FSCK_CAN_IGNORE;
+ bool async_repair = fsck_flags & FSCK_ERR_NO_LOG;
+ fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE;
struct bpos bucket = iter->pos;
bucket.offset &= ~(~0ULL << 56);
@@ -1490,10 +1490,10 @@ delete:
}
}
-static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter)
+static int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter)
{
u8 gen;
- int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false);
+ int ret = __bch2_check_discard_freespace_key(trans, iter, &gen, 0);
return ret < 0 ? ret : 0;
}
@@ -1651,7 +1651,7 @@ bkey_err:
ret = for_each_btree_key(trans, iter,
BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_prefetch, k,
- bch2_check_discard_freespace_key_fsck(trans, &iter));
+ bch2_check_discard_freespace_key(trans, &iter));
if (ret)
goto err;
@@ -1664,7 +1664,7 @@ bkey_err:
break;
ret = bkey_err(k) ?:
- bch2_check_discard_freespace_key_fsck(trans, &iter);
+ bch2_check_discard_freespace_key(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 0cc5adc5..c2e8482f 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -309,7 +309,14 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
enum btree_iter_update_trigger_flags);
-int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool);
+int __bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *,
+ enum bch_fsck_flags);
+
+static inline int bch2_check_discard_freespace_key_async(struct btree_trans *trans, struct btree_iter *iter, u8 *gen)
+{
+ return __bch2_check_discard_freespace_key(trans, iter, gen, FSCK_ERR_NO_LOG);
+}
+
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_dev_do_discards(struct bch_dev *);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index b375ad61..23a9fbb3 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -269,7 +269,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans,
return NULL;
u8 gen;
- int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
+ int ret = bch2_check_discard_freespace_key_async(trans, freespace_iter, &gen);
if (ret < 0)
return ERR_PTR(ret);
if (ret)
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index e76809e7..77d93beb 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -353,7 +353,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
- if (b == ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node)))
+ if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
return bkey_s_c_null;
if (IS_ERR_OR_NULL(b))
return ((struct bkey_s_c) { .k = ERR_CAST(b) });
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index ac99a8ec..fb3156ed 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -819,6 +819,7 @@ struct bch_fs {
struct work_struct read_only_work;
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
+ struct bch_devs_mask devs_removed;
struct bch_accounting_mem accounting;
@@ -863,9 +864,7 @@ struct bch_fs {
DARRAY(enum bcachefs_metadata_version)
incompat_versions_requested;
-#if IS_ENABLED(CONFIG_UNICODE)
struct unicode_map *cf_encoding;
-#endif
struct bch_sb_handle disk_sb;
@@ -1285,4 +1284,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca
: ca->mi.discard;
}
+static inline int bch2_fs_casefold_enabled(struct bch_fs *c)
+{
+ if (!IS_ENABLED(CONFIG_UNICODE))
+ return bch_err_throw(c, no_casefolding_without_utf8);
+ if (c->opts.casefold_disabled)
+ return bch_err_throw(c, casefolding_disabled);
+ return 0;
+}
+
#endif /* _BCACHEFS_H */
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 91e0aa79..a3631a90 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -15,6 +15,7 @@
#include <linux/prefetch.h>
#include <linux/sched/mm.h>
+#include <linux/seq_buf.h>
#include <linux/swap.h>
const char * const bch2_btree_node_flags[] = {
@@ -444,7 +445,8 @@ retry_unlocked:
}
if (b->hash_val && !ret)
- trace_and_count(c, btree_cache_reap, c, b);
+ trace_btree_node(c, b, btree_cache_reap);
+
return 0;
}
@@ -575,6 +577,19 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
return btree_cache_can_free(list);
}
+static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
+{
+ struct btree_cache_list *list = shrink->private_data;
+ struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
+
+ char *cbuf;
+ size_t buflen = seq_buf_get_buf(s, &cbuf);
+ struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
+
+ bch2_btree_cache_to_text(&out, bc);
+ seq_buf_commit(s, out.pos);
+}
+
void bch2_fs_btree_cache_exit(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
@@ -666,6 +681,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bc->live[0].shrink = shrink;
shrink->count_objects = bch2_btree_cache_count;
shrink->scan_objects = bch2_btree_cache_scan;
+ shrink->to_text = bch2_btree_cache_shrinker_to_text;
shrink->seeks = 2;
shrink->private_data = &bc->live[0];
shrinker_register(shrink);
@@ -676,6 +692,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bc->live[1].shrink = shrink;
shrink->count_objects = bch2_btree_cache_count;
shrink->scan_objects = bch2_btree_cache_scan;
+ shrink->to_text = bch2_btree_cache_shrinker_to_text;
shrink->seeks = 8;
shrink->private_data = &bc->live[1];
shrinker_register(shrink);
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index ca3c1b14..3264801c 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -153,4 +153,15 @@ void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btr
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
+#define trace_btree_node(_c, _b, event) \
+do { \
+ if (trace_##event##_enabled()) { \
+ CLASS(printbuf, buf)(); \
+ printbuf_indent_add(&buf, 2); \
+ bch2_btree_pos_to_text(&buf, c, b); \
+ trace_##event(c, buf.buf); \
+ } \
+ count_event(c, event); \
+} while (0);
+
#endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 697c6ecc..7269490a 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -534,32 +534,39 @@ fsck_err:
return ret;
}
-static int bch2_check_root(struct btree_trans *trans, enum btree_id i,
+static int bch2_check_root(struct btree_trans *trans, enum btree_id btree,
bool *reconstructed_root)
{
struct bch_fs *c = trans->c;
- struct btree_root *r = bch2_btree_id_root(c, i);
+ struct btree_root *r = bch2_btree_id_root(c, btree);
struct printbuf buf = PRINTBUF;
int ret = 0;
- bch2_btree_id_to_text(&buf, i);
+ bch2_btree_id_to_text(&buf, btree);
if (r->error) {
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
- r->alive = false;
- r->error = 0;
+ ret = bch2_btree_has_scanned_nodes(c, btree);
+ if (ret < 0)
+ goto err;
- if (!bch2_btree_has_scanned_nodes(c, i)) {
+ if (!ret) {
__fsck_err(trans,
- FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0),
+ FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0),
btree_root_unreadable_and_scan_found_nothing,
"no nodes found for btree %s, continue?", buf.buf);
- bch2_btree_root_alloc_fake_trans(trans, i, 0);
+
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 0);
} else {
- bch2_btree_root_alloc_fake_trans(trans, i, 1);
- bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 1);
+
+ bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX);
if (ret)
goto err;
}
@@ -686,7 +693,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (ret)
goto out;
- if (trans->nr_updates) {
+ if (bch2_trans_has_updates(trans)) {
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 08b22bdd..84e302af 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
- scoped_guard(rcu)
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
- struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
-
- if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
- set_btree_node_need_rewrite(b);
- set_btree_node_need_rewrite_degraded(b);
+ /*
+ * XXX:
+ *
+ * We deadlock if too many btree updates require node rewrites while
+ * we're still in journal replay.
+ *
+ * This is because btree node rewrites generate more updates for the
+ * interior updates (alloc, backpointers), and if those updates touch
+ * new nodes and generate more rewrites - well, you see the problem.
+ *
+ * The biggest cause is that we don't use the btree write buffer (for
+ * the backpointer updates - this needs some real thought on locking in
+ * order to fix.
+ *
+ * The problem with this workaround (not doing the rewrite for degraded
+ * nodes in journal replay) is that those degraded nodes persist, and we
+ * don't want that (this is a real bug when a btree node write completes
+ * with fewer replicas than we wanted and leaves a degraded node due to
+ * device _removal_, i.e. the device went away mid write).
+ *
+ * It's less of a bug here, but still a problem because we don't yet
+ * have a way of tracking degraded data - we another index (all
+ * extents/btree nodes, by replicas entry) in order to fix properly
+ * (re-replicate degraded data at the earliest possible time).
+ */
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) {
+ scoped_guard(rcu)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+ struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
+
+ if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
+ set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_degraded(b);
+ }
}
- }
+ }
if (!ptr_written) {
set_btree_node_need_rewrite(b);
@@ -1771,7 +1798,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
struct bio *bio;
int ret;
- trace_and_count(c, btree_node_read, trans, b);
+ trace_btree_node(c, b, btree_node_read);
if (static_branch_unlikely(&bch2_verify_all_btree_replicas) &&
!btree_node_read_all_replicas(c, b, sync))
@@ -2505,7 +2532,17 @@ do_write:
c->opts.nochanges)
goto err;
- trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write);
+ if (trace_btree_node_write_enabled()) {
+ CLASS(printbuf, buf)();
+ printbuf_indent_add(&buf, 2);
+ prt_printf(&buf, "offset %u sectors %u bytes %u\n",
+ b->written,
+ sectors_to_write,
+ bytes_to_write);
+ bch2_btree_pos_to_text(&buf, c, b);
+ trace_btree_node_write(c, buf.buf);
+ }
+ count_event(c, btree_node_write);
wbio = container_of(bio_alloc_bioset(NULL,
buf_pages(data, sectors_to_write << 9),
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 96697d5c..74639468 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -645,6 +645,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str
trans_for_each_update(trans, i)
if (!i->cached &&
+ !i->key_cache_flushing &&
i->level == b->c.level &&
i->btree_id == b->c.btree_id &&
bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
@@ -2189,7 +2190,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
bch2_btree_journal_peek_prev(trans, iter, search_key,
- k->k ? k->k->p : path_l(path)->b->key.k.p);
+ k->k ? k->k->p : path_l(path)->b->data->min_key);
if (next_journal) {
iter->k = next_journal->k;
@@ -2288,6 +2289,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
k.k &&
+ !bkey_deleted(k.k) &&
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
k = k2;
if (bkey_err(k)) {
@@ -2580,6 +2582,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
k.k &&
+ !bkey_deleted(k.k) &&
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
k = k2;
if (bkey_err(k2)) {
@@ -2795,7 +2798,7 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_ite
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter)
{
struct bpos search_key;
- struct bkey_s_c k;
+ struct bkey_s_c k, k2;
int ret;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
@@ -2854,18 +2857,18 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
(k = btree_trans_peek_slot_journal(trans, iter)).k)
goto out;
+ k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k);
+ if (unlikely(!k.k))
+ goto out;
+
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
- (k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) {
+ !bkey_deleted(k.k) &&
+ (k2 = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) {
+ k = k2;
if (!bkey_err(k))
iter->k = *k.k;
- /* We're not returning a key from iter->path: */
- goto out;
}
- k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k);
- if (unlikely(!k.k))
- goto out;
-
if (unlikely(k.k->type == KEY_TYPE_whiteout &&
(iter->flags & BTREE_ITER_filter_snapshots) &&
!(iter->flags & BTREE_ITER_key_cache_fill)))
@@ -3238,32 +3241,30 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
}
EBUG_ON(trans->mem);
+ EBUG_ON(trans->mem_bytes);
+ EBUG_ON(trans->mem_top);
+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX);
+
+ bool lock_dropped = false;
+ new_mem = allocate_dropping_locks_norelock(trans, lock_dropped, kmalloc(new_bytes, _gfp));
+ if (!new_mem) {
+ new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ trans->used_mempool = true;
+ }
- new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
- if (unlikely(!new_mem)) {
- bch2_trans_unlock(trans);
-
- new_mem = kmalloc(new_bytes, GFP_KERNEL);
- if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
- new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
- new_bytes = BTREE_TRANS_MEM_MAX;
- trans->used_mempool = true;
- }
-
- EBUG_ON(!new_mem);
+ EBUG_ON(!new_mem);
- trans->mem = new_mem;
- trans->mem_bytes = new_bytes;
+ trans->mem = new_mem;
+ trans->mem_bytes = new_bytes;
+ if (unlikely(lock_dropped)) {
ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
- trans->mem = new_mem;
- trans->mem_bytes = new_bytes;
-
- p = trans->mem + trans->mem_top;
+ p = trans->mem;
trans->mem_top += size;
memset(p, 0, size);
return p;
@@ -3324,22 +3325,25 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->mem_top = 0;
if (unlikely(trans->restarted == BCH_ERR_transaction_restart_mem_realloced)) {
- EBUG_ON(!trans->mem || !trans->mem_bytes);
unsigned new_bytes = trans->realloc_bytes_required;
- void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
- if (unlikely(!new_mem)) {
- bch2_trans_unlock(trans);
- new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
-
- EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX);
-
- if (!new_mem) {
- new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
- new_bytes = BTREE_TRANS_MEM_MAX;
- trans->used_mempool = true;
- kfree(trans->mem);
- }
- }
+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX);
+ EBUG_ON(!trans->mem);
+ EBUG_ON(!trans->mem_bytes);
+
+ bool lock_dropped = false;
+ void *new_mem = allocate_dropping_locks_norelock(trans, lock_dropped,
+ krealloc(trans->mem, new_bytes, _gfp));
+ (void)lock_dropped;
+
+ if (!new_mem) {
+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ trans->used_mempool = true;
+ kfree(trans->mem);
+ }
+
+ EBUG_ON(!new_mem);
+
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
}
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 09dd3e52..cc2c6bb6 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -963,6 +963,20 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *,
_p; \
})
+#define allocate_dropping_locks_norelock(_trans, _lock_dropped, _do) \
+({ \
+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
+ typeof(_do) _p = _do; \
+ _lock_dropped = false; \
+ if (unlikely(!_p)) { \
+ bch2_trans_unlock(_trans); \
+ _lock_dropped = true; \
+ _gfp = GFP_KERNEL; \
+ _p = _do; \
+ } \
+ _p; \
+})
+
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);
diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c
index a41fabd0..341d31b3 100644
--- a/libbcachefs/btree_journal_iter.c
+++ b/libbcachefs/btree_journal_iter.c
@@ -137,12 +137,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b
struct journal_key *k;
BUG_ON(*idx > keys->nr);
+
+ if (!keys->nr)
+ return NULL;
search:
if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
while (*idx < keys->nr &&
- __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) >= 0) {
+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
(*idx)++;
iters++;
if (iters == 10) {
@@ -151,18 +154,23 @@ search:
}
}
+ if (*idx == keys->nr)
+ --(*idx);
+
struct bkey_i *ret = NULL;
rcu_read_lock(); /* for overwritten_ranges */
- while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
+ while (true) {
+ k = idx_to_key(keys, *idx);
if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
break;
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->start - 1;
- else
- *idx -= 1;
+ *idx = rcu_dereference(k->overwritten_range)->start;
+ if (!*idx)
+ break;
+ --(*idx);
continue;
}
@@ -171,6 +179,8 @@ search:
break;
}
+ if (!*idx)
+ break;
--(*idx);
iters++;
if (iters == 10) {
@@ -707,6 +717,18 @@ static void __journal_keys_sort(struct journal_keys *keys)
keys->nr = dst - keys->data;
}
+static bool should_rewind_entry(struct bch_fs *c, struct jset_entry *entry)
+{
+ if (entry->level)
+ return false;
+ if (btree_id_is_alloc(entry->btree_id))
+ return false;
+ if (c->opts.journal_rewind_no_extents &&
+ entry->btree_id == BTREE_ID_extents)
+ return false;
+ return true;
+}
+
int bch2_journal_keys_sort(struct bch_fs *c)
{
struct genradix_iter iter;
@@ -725,9 +747,8 @@ int bch2_journal_keys_sort(struct bch_fs *c)
cond_resched();
vstruct_for_each(&i->j, entry) {
- bool rewind = !entry->level &&
- !btree_id_is_alloc(entry->btree_id) &&
- le64_to_cpu(i->j.seq) >= rewind_seq;
+ bool rewind = le64_to_cpu(i->j.seq) >= rewind_seq &&
+ should_rewind_entry(c, entry);
if (entry->type != (rewind
? BCH_JSET_ENTRY_overwrite
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index d96188b9..19d1bb80 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -13,6 +13,7 @@
#include "trace.h"
#include <linux/sched/mm.h>
+#include <linux/seq_buf.h>
static inline bool btree_uses_pcpu_readers(enum btree_id id)
{
@@ -580,6 +581,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
bool kick_reclaim = false;
BUG_ON(insert->k.u64s > ck->u64s);
+ BUG_ON(bkey_deleted(&insert->k));
bkey_copy(ck->k, insert);
@@ -815,6 +817,18 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
{
}
+static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
+{
+ struct bch_fs *c = shrink->private_data;
+ struct btree_key_cache *bc = &c->btree_key_cache;
+ char *cbuf;
+ size_t buflen = seq_buf_get_buf(s, &cbuf);
+ struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
+
+ bch2_btree_key_cache_to_text(&out, bc);
+ seq_buf_commit(s, out.pos);
+}
+
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
@@ -839,6 +853,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->shrink = shrink;
shrink->count_objects = bch2_btree_key_cache_count;
shrink->scan_objects = bch2_btree_key_cache_scan;
+ shrink->to_text = bch2_btree_key_cache_shrinker_to_text;
shrink->batch = 1 << 14;
shrink->seeks = 0;
shrink->private_data = c;
diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c
index 91a51aef..bed2b4b6 100644
--- a/libbcachefs/btree_locking.c
+++ b/libbcachefs/btree_locking.c
@@ -771,7 +771,7 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans)
}
static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
- struct get_locks_fail *f, bool trace)
+ struct get_locks_fail *f, bool trace, ulong ip)
{
if (!trace)
goto out;
@@ -796,7 +796,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st
prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
}
- trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
+ trace_trans_restart_relock(trans, ip, buf.buf);
printbuf_exit(&buf);
}
@@ -806,7 +806,7 @@ out:
bch2_trans_verify_locks(trans);
}
-static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
+static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulong ip)
{
bch2_trans_verify_locks(trans);
@@ -825,7 +825,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
if (path->should_be_locked &&
(ret = btree_path_get_locks(trans, path, false, &f,
BCH_ERR_transaction_restart_relock))) {
- bch2_trans_relock_fail(trans, path, &f, trace);
+ bch2_trans_relock_fail(trans, path, &f, trace, ip);
return ret;
}
}
@@ -838,12 +838,12 @@ out:
int bch2_trans_relock(struct btree_trans *trans)
{
- return __bch2_trans_relock(trans, true);
+ return __bch2_trans_relock(trans, true, _RET_IP_);
}
int bch2_trans_relock_notrace(struct btree_trans *trans)
{
- return __bch2_trans_relock(trans, false);
+ return __bch2_trans_relock(trans, false, _RET_IP_);
}
void bch2_trans_unlock(struct btree_trans *trans)
diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c
index a3584773..23d8c62e 100644
--- a/libbcachefs/btree_node_scan.c
+++ b/libbcachefs/btree_node_scan.c
@@ -521,8 +521,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
return false;
}
-bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
{
+ int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ return ret;
+
struct found_btree_node search = {
.btree_id = btree,
.level = 0,
diff --git a/libbcachefs/btree_node_scan.h b/libbcachefs/btree_node_scan.h
index 08687b20..66e6f9ed 100644
--- a/libbcachefs/btree_node_scan.h
+++ b/libbcachefs/btree_node_scan.h
@@ -4,7 +4,7 @@
int bch2_scan_for_btree_nodes(struct bch_fs *);
bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
-bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 639ef75b..7fcf248a 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -46,6 +46,9 @@ void bch2_trans_commit_flags_to_text(struct printbuf *out, enum bch_trans_commit
static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
{
#ifdef CONFIG_BCACHEFS_DEBUG
+ if (i->key_cache_flushing)
+ return;
+
struct bch_fs *c = trans->c;
struct bkey u;
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u);
@@ -337,6 +340,9 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
BUG_ON(!bpos_eq(i->k->k.p, path->pos));
BUG_ON(i->cached != path->cached);
+ BUG_ON(i->cached &&
+ !i->key_cache_already_flushed &&
+ bkey_deleted(&i->k->k));;
BUG_ON(i->level != path->level);
BUG_ON(i->btree_id != path->btree_id);
BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id));
@@ -1015,9 +1021,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (unlikely(ret))
goto out_reset;
- if (!trans->nr_updates &&
- !trans->journal_entries.u64s &&
- !trans->accounting.u64s)
+ if (!bch2_trans_has_updates(trans))
goto out_reset;
ret = bch2_trans_commit_run_triggers(trans);
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 112170fd..76adf756 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -422,14 +422,16 @@ struct btree_insert_entry {
u8 sort_order;
u8 bkey_type;
enum btree_id btree_id:8;
- u8 level:4;
+ u8 level:3;
bool cached:1;
bool insert_trigger_run:1;
bool overwrite_trigger_run:1;
bool key_cache_already_flushed:1;
+ bool key_cache_flushing:1;
/*
- * @old_k may be a key from the journal; @old_btree_u64s always refers
- * to the size of the key being overwritten in the btree:
+ * @old_k may be a key from the journal or the key cache;
+ * @old_btree_u64s always refers to the size of the key being
+ * overwritten in the btree:
*/
u8 old_btree_u64s;
btree_path_idx_t path;
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 192c1e5e..5d9e0237 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -325,47 +325,11 @@ err:
return ret;
}
-static noinline int flush_new_cached_update(struct btree_trans *trans,
- struct btree_insert_entry *i,
- enum btree_iter_update_trigger_flags flags,
- unsigned long ip)
-{
- struct bkey k;
- int ret;
-
- btree_path_idx_t path_idx =
- bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0,
- BTREE_ITER_intent, _THIS_IP_);
- ret = bch2_btree_path_traverse(trans, path_idx, 0);
- if (ret)
- goto out;
-
- struct btree_path *btree_path = trans->paths + path_idx;
-
- /*
- * The old key in the insert entry might actually refer to an existing
- * key in the btree that has been deleted from cache and not yet
- * flushed. Check for this and skip the flush so we don't run triggers
- * against a stale key.
- */
- bch2_btree_path_peek_slot_exact(btree_path, &k);
- if (!bkey_deleted(&k))
- goto out;
-
- i->key_cache_already_flushed = true;
- i->flags |= BTREE_TRIGGER_norun;
-
- btree_path_set_should_be_locked(trans, btree_path);
- ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip);
-out:
- bch2_path_put(trans, path_idx, true);
- return ret;
-}
-
-static int __must_check
-bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
- struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
- unsigned long ip)
+static inline struct btree_insert_entry *
+__btree_trans_update_by_path(struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i, n;
@@ -436,6 +400,58 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
__btree_path_get(trans, trans->paths + i->path, true);
trace_update_by_path(trans, path, i, overwrite);
+ return i;
+}
+
+static noinline int flush_new_cached_update(struct btree_trans *trans,
+ struct btree_insert_entry *i,
+ enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
+{
+ btree_path_idx_t path_idx =
+ bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0,
+ BTREE_ITER_intent, _THIS_IP_);
+ int ret = bch2_btree_path_traverse(trans, path_idx, 0);
+ if (ret)
+ goto out;
+
+ struct btree_path *btree_path = trans->paths + path_idx;
+
+ btree_path_set_should_be_locked(trans, btree_path);
+#if 0
+ /*
+ * The old key in the insert entry might actually refer to an existing
+ * key in the btree that has been deleted from cache and not yet
+ * flushed. Check for this and skip the flush so we don't run triggers
+ * against a stale key.
+ */
+ struct bkey k;
+ bch2_btree_path_peek_slot_exact(btree_path, &k);
+ if (!bkey_deleted(&k))
+ goto out;
+#endif
+ i->key_cache_already_flushed = true;
+ i->flags |= BTREE_TRIGGER_norun;
+
+ struct bkey old_k = i->old_k;
+ const struct bch_val *old_v = i->old_v;
+
+ i = __btree_trans_update_by_path(trans, path_idx, i->k, flags, _THIS_IP_);
+
+ i->old_k = old_k;
+ i->old_v = old_v;
+ i->key_cache_flushing = true;
+out:
+ bch2_path_put(trans, path_idx, true);
+ return ret;
+}
+
+static int __must_check
+bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
+{
+ struct btree_insert_entry *i = __btree_trans_update_by_path(trans, path_idx, k, flags, ip);
/*
* If a key is present in the key cache, it must also exist in the
@@ -444,10 +460,9 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
* the key cache - but the key has to exist in the btree for that to
* work:
*/
- if (path->cached && !i->old_btree_u64s)
- return flush_new_cached_update(trans, i, flags, ip);
-
- return 0;
+ return i->cached && (!i->old_btree_u64s || bkey_deleted(&k->k))
+ ? flush_new_cached_update(trans, i, flags, ip)
+ : 0;
}
static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
@@ -566,7 +581,7 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans,
if (buf->u64s)
memcpy(n,
btree_trans_subbuf_base(trans, buf),
- buf->size * sizeof(u64));
+ buf->u64s * sizeof(u64));
buf->base = (u64 *) n - (u64 *) trans->mem;
buf->size = new_size;
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index e4b6e7d5..2c6f9b44 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -184,8 +184,7 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
-int bch2_btree_write_buffer_insert_err(struct btree_trans *,
- enum btree_id, struct bkey_i *);
+int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *);
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
@@ -196,7 +195,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
if (unlikely(!btree_type_uses_write_buffer(btree))) {
- int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+ int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k);
dump_stack();
return ret;
}
@@ -272,6 +271,13 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
(_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++)
+static inline bool bch2_trans_has_updates(struct btree_trans *trans)
+{
+ return trans->nr_updates ||
+ trans->journal_entries.u64s ||
+ trans->accounting.u64s;
+}
+
static inline void bch2_trans_reset_updates(struct btree_trans *trans)
{
trans_for_each_update(trans, i)
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index d9ac09fa..8e3d3db2 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -217,7 +217,7 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b)
{
struct bch_fs *c = trans->c;
- trace_and_count(c, btree_node_free, trans, b);
+ trace_btree_node(c, b, btree_node_free);
BUG_ON(btree_node_write_blocked(b));
BUG_ON(btree_node_dirty(b));
@@ -406,7 +406,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as,
ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
BUG_ON(ret);
- trace_and_count(c, btree_node_alloc, trans, b);
+ trace_btree_node(c, b, btree_node_alloc);
bch2_increment_clock(c, btree_sectors(c), WRITE);
return b;
}
@@ -1278,10 +1278,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
do {
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
-
+ if (!bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ break;
bch2_trans_unlock(trans);
bch2_wait_on_allocator(c, &cl);
- } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
+ } while (1);
}
if (ret) {
@@ -1330,7 +1331,7 @@ static int bch2_btree_set_root(struct btree_update *as,
{
struct bch_fs *c = as->c;
- trace_and_count(c, btree_node_set_root, trans, b);
+ trace_btree_node(c, b, btree_node_set_root);
struct btree *old = btree_node_root(c, b);
@@ -1640,7 +1641,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
struct btree *n[2];
- trace_and_count(c, btree_node_split, trans, b);
+ trace_btree_node(c, b, btree_node_split);
n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
@@ -1702,7 +1703,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
goto err;
}
} else {
- trace_and_count(c, btree_node_compact, trans, b);
+ trace_btree_node(c, b, btree_node_compact);
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
@@ -2118,7 +2119,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
as->node_start = prev->data->min_key;
as->node_end = next->data->max_key;
- trace_and_count(c, btree_node_merge, trans, b);
+ trace_btree_node(c, b, btree_node_merge);
n = bch2_btree_node_alloc(as, trans, b->c.level);
@@ -2250,8 +2251,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + new_path, n);
- trace_and_count(c, btree_node_rewrite, trans, b);
-
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
@@ -2262,6 +2261,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (ret)
goto err;
+ trace_btree_node(c, b, btree_node_rewrite);
+
bch2_btree_interior_update_will_free_node(as, b);
bch2_btree_update_get_open_buckets(as, n);
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 21b5c03d..4b095235 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -267,10 +267,9 @@ out:
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
}
-int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+int bch2_btree_write_buffer_insert_err(struct bch_fs *c,
enum btree_id btree, struct bkey_i *k)
{
- struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
@@ -332,7 +331,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
- ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+ ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k);
goto err;
}
diff --git a/libbcachefs/btree_write_buffer.h b/libbcachefs/btree_write_buffer.h
index 05f56fd1..c351d21a 100644
--- a/libbcachefs/btree_write_buffer.h
+++ b/libbcachefs/btree_write_buffer.h
@@ -89,6 +89,12 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
+ if (unlikely(!btree_type_uses_write_buffer(btree))) {
+ int ret = bch2_btree_write_buffer_insert_err(c, btree, k);
+ dump_stack();
+ return ret;
+ }
+
EBUG_ON(!dst->seq);
return k->k.type == KEY_TYPE_accounting
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index e848e210..3968f3be 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -783,6 +783,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
darray_for_each(m->op.devs_have, i)
__clear_bit(*i, devs.d);
+ CLASS(printbuf, buf)();
+ buf.atomic++;
+
guard(rcu)();
unsigned nr_replicas = 0, i;
@@ -794,7 +797,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
struct bch_dev_usage usage;
bch2_dev_usage_read_fast(ca, &usage);
- if (!dev_buckets_free(ca, usage, m->op.watermark))
+ u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark);
+
+ prt_printf(&buf, "%s=%llu ", ca->name, nr_free);
+
+ if (!nr_free)
continue;
nr_replicas += ca->mi.durability;
@@ -802,8 +809,10 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
break;
}
- if (!nr_replicas)
+ if (!nr_replicas) {
+ trace_data_update_done_no_rw_devs(c, buf.buf);
return bch_err_throw(c, data_update_done_no_rw_devs);
+ }
if (nr_replicas < m->op.nr_replicas)
return bch_err_throw(c, insufficient_devices);
return 0;
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 308de4b2..ccbb0127 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -18,9 +18,12 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
-#if IS_ENABLED(CONFIG_UNICODE)
+ int ret = bch2_fs_casefold_enabled(trans->c);
+ if (ret)
+ return ret;
+
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
- int ret = PTR_ERR_OR_ZERO(buf);
+ ret = PTR_ERR_OR_ZERO(buf);
if (ret)
return ret;
@@ -30,9 +33,6 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
return 0;
-#else
- return bch_err_throw(trans->c, no_casefolding_without_utf8);
-#endif
}
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
@@ -252,7 +252,10 @@ int bch2_dirent_init_name(struct bch_fs *c,
offsetof(struct bch_dirent, d_name) -
name->len);
} else {
-#if IS_ENABLED(CONFIG_UNICODE)
+ int ret = bch2_fs_casefold_enabled(c);
+ if (ret)
+ return ret;
+
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
@@ -278,9 +281,6 @@ int bch2_dirent_init_name(struct bch_fs *c,
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
-#else
- return bch_err_throw(c, no_casefolding_without_utf8);
-#endif
}
unsigned u64s = dirent_val_u64s(name->len, cf_len);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 543dbba9..687c3ba9 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1683,7 +1683,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
return ERR_PTR(ret);
if (test_bit(BCH_FS_going_ro, &c->flags)) {
- h = ERR_PTR(-BCH_ERR_erofs_no_writes);
+ h = ERR_PTR(bch_err_throw(c, erofs_no_writes));
goto err;
}
@@ -1702,7 +1702,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
if (!h) {
- h = ERR_PTR(-BCH_ERR_ENOMEM_stripe_head_alloc);
+ h = ERR_PTR(bch_err_throw(c, ENOMEM_stripe_head_alloc));
goto err;
}
found:
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 3118449d..d27b94a6 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -218,6 +218,7 @@
x(EINVAL, option_negative) \
x(EOPNOTSUPP, may_not_use_incompat_feature) \
x(EOPNOTSUPP, no_casefolding_without_utf8) \
+ x(EOPNOTSUPP, casefolding_disabled) \
x(EOPNOTSUPP, casefold_opt_is_dir_only) \
x(EOPNOTSUPP, unsupported_fsx_flag) \
x(EOPNOTSUPP, unsupported_fa_flag) \
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index b2a6c041..a9a9fe19 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -401,7 +401,8 @@ int bch2_fsck_err_opt(struct bch_fs *c,
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
flags |= fsck_flags_extra[err];
- if (test_bit(BCH_FS_in_fsck, &c->flags)) {
+ if (test_bit(BCH_FS_in_fsck, &c->flags) ||
+ test_bit(BCH_FS_in_recovery, &c->flags)) {
if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
return bch_err_throw(c, fsck_repair_unimplemented);
@@ -472,10 +473,13 @@ int __bch2_fsck_err(struct bch_fs *c,
!trans &&
bch2_current_has_btree_trans(c));
- if (test_bit(err, c->sb.errors_silent))
- return flags & FSCK_CAN_FIX
+ if ((flags & FSCK_ERR_SILENT) ||
+ test_bit(err, c->sb.errors_silent)) {
+ ret = flags & FSCK_CAN_FIX
? bch_err_throw(c, fsck_fix)
: bch_err_throw(c, fsck_ignore);
+ goto err;
+ }
printbuf_indent_add_nextline(out, 2);
@@ -620,14 +624,14 @@ print:
if (s)
s->ret = ret;
-
+err_unlock:
+ mutex_unlock(&c->fsck_error_msgs_lock);
+err:
if (trans &&
!(flags & FSCK_ERR_NO_LOG) &&
ret == -BCH_ERR_fsck_fix)
ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret;
-err_unlock:
- mutex_unlock(&c->fsck_error_msgs_lock);
-err:
+
/*
* We don't yet track whether the filesystem currently has errors, for
* log_fsck_err()s: that would require us to track for every error type
diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c
index 66bacdd4..dad48d44 100644
--- a/libbcachefs/fs-io-buffered.c
+++ b/libbcachefs/fs-io-buffered.c
@@ -145,7 +145,7 @@ static int readpage_bio_extend(struct btree_trans *trans,
BUG_ON(folio_sector(folio) != bio_end_sector(bio));
- BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0));
+ bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
}
return bch2_trans_relock(trans);
@@ -311,7 +311,7 @@ void bch2_readahead(struct readahead_control *ractl)
readpage_iter_advance(&readpages_iter);
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
- BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+ bio_add_folio_nofail(&rbio->bio, folio, folio_size(folio), 0);
bchfs_read(trans, rbio, inode_inum(inode),
&readpages_iter);
@@ -354,7 +354,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
rbio->bio.bi_private = &done;
rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
- BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+ bio_add_folio_nofail(&rbio->bio, folio, folio_size(folio), 0);
blk_start_plug(&plug);
bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0));
@@ -639,8 +639,8 @@ do_io:
atomic_inc(&s->write_count);
BUG_ON(inode != w->io->inode);
- BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio,
- sectors << 9, offset << 9));
+ bio_add_folio_nofail(&w->io->op.wbio.bio, folio,
+ sectors << 9, offset << 9);
w->io->op.res.sectors += reserved_sectors;
w->io->op.i_sectors_delta -= dirty_sectors;
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index df42d58d..3b0783f1 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (IS_ERR(inode))
inode = NULL;
-#if IS_ENABLED(CONFIG_UNICODE)
if (!inode && IS_CASEFOLDED(vdir)) {
/*
* Do not cache a negative dentry in casefolded directories
@@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
*/
return NULL;
}
-#endif
return d_splice_alias(&inode->v, dentry);
}
@@ -1694,11 +1692,15 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap,
s.mask = map_defined(bch_flags_to_xflags);
s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags);
- if (fa->fsx_xflags)
- return bch_err_throw(c, unsupported_fsx_flag);
+ if (fa->fsx_xflags) {
+ ret = bch_err_throw(c, unsupported_fsx_flag);
+ goto err;
+ }
- if (fa->fsx_projid >= U32_MAX)
- return bch_err_throw(c, projid_too_big);
+ if (fa->fsx_projid >= U32_MAX) {
+ ret = bch_err_throw(c, projid_too_big);
+ goto err;
+ }
/*
* inode fields accessible via the xattr interface are stored with a +1
@@ -1720,8 +1722,10 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap,
fa->flags &= ~FS_CASEFOLD_FL;
s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags);
- if (fa->flags)
- return bch_err_throw(c, unsupported_fa_flag);
+ if (fa->flags) {
+ ret = bch_err_throw(c, unsupported_fa_flag);
+ goto err;
+ }
}
mutex_lock(&inode->ei_update_lock);
@@ -1732,7 +1736,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap,
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
- return ret;
+err:
+ return bch2_err_class(ret);
}
static const struct file_operations bch_file_operations = {
@@ -2565,9 +2570,10 @@ got_sb:
sb->s_shrink->seeks = 0;
#if IS_ENABLED(CONFIG_UNICODE)
- sb->s_encoding = c->cf_encoding;
-#endif
+ if (!bch2_fs_casefold_enabled(c))
+ sb->s_encoding = c->cf_encoding;
generic_set_sb_d_ops(sb);
+#endif
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 4ceb28a6..1ceca63c 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -12,6 +12,7 @@
#include "fs.h"
#include "fsck.h"
#include "inode.h"
+#include "io_misc.h"
#include "keylist.h"
#include "namei.h"
#include "recovery_passes.h"
@@ -1500,6 +1501,10 @@ static int check_key_has_inode(struct btree_trans *trans,
SPOS(k.k->p.inode, 0, k.k->p.snapshot),
POS(k.k->p.inode, U64_MAX),
0, k2, ret) {
+ if (k.k->type == KEY_TYPE_error ||
+ k.k->type == KEY_TYPE_hash_whiteout)
+ continue;
+
nr_keys++;
if (nr_keys <= 10) {
bch2_bkey_val_to_text(&buf, c, k2);
@@ -1512,9 +1517,11 @@ static int check_key_has_inode(struct btree_trans *trans,
if (ret)
goto err;
+ unsigned reconstruct_limit = iter->btree_id == BTREE_ID_extents ? 3 : 0;
+
if (nr_keys > 100)
prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys);
- else if (nr_keys > 10)
+ else if (nr_keys > reconstruct_limit)
prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys);
if (!have_inode) {
@@ -1572,6 +1579,44 @@ reconstruct:
goto out;
}
+static int maybe_reconstruct_inum_btree(struct btree_trans *trans,
+ u64 inum, u32 snapshot,
+ enum btree_id btree)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key_max_norestart(trans, iter, btree,
+ SPOS(inum, 0, snapshot),
+ POS(inum, U64_MAX),
+ 0, k, ret) {
+ ret = 1;
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret <= 0)
+ return ret;
+
+ if (fsck_err(trans, missing_inode_with_contents,
+ "inode %llu:%u type %s missing, but contents found: reconstruct?",
+ inum, snapshot,
+ btree == BTREE_ID_extents ? "reg" : "dir"))
+ return reconstruct_inode(trans, btree, snapshot, inum) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ bch_err_throw(trans->c, transaction_restart_commit);
+fsck_err:
+ return ret;
+}
+
+static int maybe_reconstruct_inum(struct btree_trans *trans,
+ u64 inum, u32 snapshot)
+{
+ return maybe_reconstruct_inum_btree(trans, inum, snapshot, BTREE_ID_extents) ?:
+ maybe_reconstruct_inum_btree(trans, inum, snapshot, BTREE_ID_dirents);
+}
+
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
@@ -1919,33 +1964,11 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout));
- ret = PTR_ERR_OR_ZERO(whiteout);
- if (ret)
- goto err;
-
- bkey_init(&whiteout->k);
- whiteout->k.p = SPOS(k.k->p.inode,
- last_block,
- i->inode.bi_snapshot);
- bch2_key_resize(&whiteout->k,
- min(KEY_SIZE_MAX & (~0 << c->block_bits),
- U64_MAX - whiteout->k.p.offset));
-
-
- /*
- * Need a normal (not BTREE_ITER_all_snapshots)
- * iterator, if we're deleting in a different
- * snapshot and need to emit a whiteout
- */
- struct btree_iter iter2;
- bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents,
- bkey_start_pos(&whiteout->k),
- BTREE_ITER_intent);
- ret = bch2_btree_iter_traverse(trans, &iter2) ?:
- bch2_trans_update(trans, &iter2, whiteout,
- BTREE_UPDATE_internal_snapshot_node);
- bch2_trans_iter_exit(trans, &iter2);
+ ret = bch2_fpunch_snapshot(trans,
+ SPOS(i->inode.bi_inum,
+ last_block,
+ i->inode.bi_snapshot),
+ POS(i->inode.bi_inum, U64_MAX));
if (ret)
goto err;
@@ -2302,9 +2325,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
-#if IS_ENABLED(CONFIG_UNICODE)
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
-#endif
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
@@ -2368,6 +2389,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
goto err;
+ if (!target->inodes.nr) {
+ ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum),
+ d.k->p.snapshot);
+ if (ret)
+ return ret;
+ }
+
if (fsck_err_on(!target->inodes.nr,
trans, dirent_to_missing_inode,
"dirent points to missing inode:\n%s",
@@ -2811,7 +2839,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent");
if (ret)
- break;
+ goto out;
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 95f3c0d4..307fb0c9 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -38,7 +38,7 @@ static const char * const bch2_inode_flag_strs[] = {
#undef x
static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos);
-static int may_delete_deleted_inum(struct btree_trans *, subvol_inum);
+static int may_delete_deleted_inum(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *);
static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
@@ -1018,6 +1018,7 @@ int bch2_inode_create(struct btree_trans *trans,
u64 start = le64_to_cpu(cursor->v.idx);
u64 pos = start;
+ u64 gen = 0;
bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
BTREE_ITER_all_snapshots|
@@ -1030,6 +1031,12 @@ again:
if (pos < iter->pos.offset)
goto found_slot;
+ if (bch2_snapshot_is_ancestor(trans->c, snapshot, k.k->p.snapshot) &&
+ k.k->type == KEY_TYPE_inode_generation) {
+ gen = le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
+ goto found_slot;
+ }
+
/*
* We don't need to iterate over keys in every snapshot once
* we've found just one:
@@ -1064,7 +1071,7 @@ found_slot:
}
inode_u->bi_inum = k.k->p.offset;
- inode_u->bi_generation = le64_to_cpu(cursor->v.gen);
+ inode_u->bi_generation = max(gen, le64_to_cpu(cursor->v.gen));
cursor->v.idx = cpu_to_le64(k.k->p.offset + 1);
return 0;
}
@@ -1128,10 +1135,11 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = {};
struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
u32 snapshot;
int ret;
- ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum));
+ ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode));
if (ret)
goto err2;
@@ -1143,9 +1151,10 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
* XXX: the dirent code ideally would delete whiteouts when they're no
* longer needed
*/
- ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
- bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
- bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
+ ret = (!S_ISDIR(inode.bi_mode)
+ ? bch2_inode_delete_keys(trans, inum, BTREE_ID_extents)
+ : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs);
if (ret)
goto err2;
retry:
@@ -1265,8 +1274,12 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
{
struct bch_fs *c = trans->c;
-#if IS_ENABLED(CONFIG_UNICODE)
- int ret = 0;
+ int ret = bch2_fs_casefold_enabled(c);
+ if (ret) {
+ bch_err_ratelimited(c, "Cannot enable casefolding: %s", bch2_err_str(ret));
+ return ret;
+ }
+
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
return bch_err_throw(c, casefold_opt_is_dir_only);
@@ -1289,10 +1302,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
bi->bi_fields_set |= BIT(Inode_opt_casefold);
return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi);
-#else
- bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
- return bch_err_throw(c, no_casefolding_without_utf8);
-#endif
}
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
@@ -1317,7 +1326,7 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum
SPOS(inum, 0, snapshot),
SPOS(inum, U64_MAX, snapshot),
0, NULL);
- } while (ret == -BCH_ERR_transaction_restart_nested);
+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
if (ret)
goto err;
retry:
@@ -1355,7 +1364,7 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- return ret ?: -BCH_ERR_transaction_restart_nested;
+ return ret ?: bch_err_throw(c, transaction_restart_nested);
}
/*
@@ -1398,12 +1407,12 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
}
static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
+ struct bch_inode_unpacked *inode,
bool from_deleted_inodes)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter;
struct bkey_s_c k;
- struct bch_inode_unpacked inode;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1421,11 +1430,11 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
if (ret)
goto out;
- ret = bch2_inode_unpack(k, &inode);
+ ret = bch2_inode_unpack(k, inode);
if (ret)
goto out;
- if (S_ISDIR(inode.bi_mode)) {
+ if (S_ISDIR(inode->bi_mode)) {
ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot);
if (fsck_err_on(from_deleted_inodes &&
bch2_err_matches(ret, ENOTEMPTY),
@@ -1437,7 +1446,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
goto out;
}
- ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked);
+ ret = inode->bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked);
if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_not_unlinked,
"non-deleted inode %llu:%u in deleted_inodes btree",
@@ -1446,7 +1455,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
if (ret)
goto out;
- ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot)
+ ret = !(inode->bi_flags & BCH_INODE_has_child_snapshot)
? 0 : bch_err_throw(c, inode_has_child_snapshot);
if (fsck_err_on(from_deleted_inodes && ret,
@@ -1465,10 +1474,10 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
if (fsck_err(trans, inode_has_child_snapshots_wrong,
"inode has_child_snapshots flag wrong (should be set)\n%s",
(printbuf_reset(&buf),
- bch2_inode_unpacked_to_text(&buf, &inode),
+ bch2_inode_unpacked_to_text(&buf, inode),
buf.buf))) {
- inode.bi_flags |= BCH_INODE_has_child_snapshot;
- ret = __bch2_fsck_write_inode(trans, &inode);
+ inode->bi_flags |= BCH_INODE_has_child_snapshot;
+ ret = __bch2_fsck_write_inode(trans, inode);
if (ret)
goto out;
}
@@ -1504,12 +1513,13 @@ delete:
goto out;
}
-static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum)
+static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode)
{
u32 snapshot;
return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
- may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false);
+ may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), inode, false);
}
int bch2_delete_dead_inodes(struct bch_fs *c)
@@ -1535,7 +1545,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- ret = may_delete_deleted_inode(trans, k.k->p, true);
+ struct bch_inode_unpacked inode;
+ ret = may_delete_deleted_inode(trans, k.k->p, &inode, true);
if (ret > 0) {
bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u",
k.k->p.offset, k.k->p.snapshot);
diff --git a/libbcachefs/io_misc.c b/libbcachefs/io_misc.c
index bf72b1d2..07023667 100644
--- a/libbcachefs/io_misc.c
+++ b/libbcachefs/io_misc.c
@@ -135,6 +135,33 @@ err_noprint:
return ret;
}
+/* For fsck */
+int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end)
+{
+ u32 restart_count = trans->restart_count;
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0);
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bkey_i delete;
+
+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
+ start, end, 0, k,
+ &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ bkey_init(&delete.k);
+ delete.k.p = iter.pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end, &delete);
+
+ bch2_extent_trim_atomic(trans, &iter, &delete) ?:
+ bch2_trans_update(trans, &iter, &delete, 0);
+ }));
+
+ bch2_disk_reservation_put(c, &disk_res);
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
/*
* Returns -BCH_ERR_transacton_restart if we had to drop locks:
*/
diff --git a/libbcachefs/io_misc.h b/libbcachefs/io_misc.h
index 9cb44a7c..b93e4d4b 100644
--- a/libbcachefs/io_misc.h
+++ b/libbcachefs/io_misc.h
@@ -5,6 +5,8 @@
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
u64, struct bch_io_opts, s64 *,
struct write_point_specifier);
+
+int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index cd184b21..210b6adc 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -37,12 +37,6 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644);
MODULE_PARM_DESC(read_corrupt_ratio, "");
#endif
-static bool bch2_poison_extents_on_checksum_error;
-module_param_named(poison_extents_on_checksum_error,
- bch2_poison_extents_on_checksum_error, bool, 0644);
-MODULE_PARM_DESC(poison_extents_on_checksum_error,
- "Extents with checksum errors are marked as poisoned - unsafe without read fua support");
-
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@@ -484,9 +478,6 @@ static void get_rbio_extent(struct btree_trans *trans,
static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
enum btree_id btree, struct bkey_s_c read_k)
{
- if (!bch2_poison_extents_on_checksum_error)
- return 0;
-
struct bch_fs *c = trans->c;
struct data_update *u = rbio_data_update(rbio);
@@ -1232,6 +1223,10 @@ retry_pick:
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
+ /* XXX: also nvme read recovery level */
+ if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev)))
+ rbio->bio.bi_opf |= REQ_FUA;
+
if (rbio->bounce)
trace_and_count(c, io_read_bounce, &rbio->bio);
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index df71af00..ce534061 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1283,7 +1283,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca
ret = 0; /* wait and retry */
bch2_disk_reservation_put(c, &disk_res);
- closure_sync(&cl);
+ bch2_wait_on_allocator(c, &cl);
}
return ret;
@@ -1376,7 +1376,6 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
return bch_err_throw(c, erofs_filesystem_full);
}
- unsigned nr;
int ret;
if (dynamic_fault("bcachefs:add:journal_alloc")) {
@@ -1385,16 +1384,19 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
}
/* 1/128th of the device by default: */
- nr = ca->mi.nbuckets >> 7;
+ unsigned nr = ca->mi.nbuckets >> 7;
/*
- * clamp journal size to 8192 buckets or 8GB (in sectors), whichever
- * is smaller:
+ * clamp journal size to 8GB, or 32GB with large_journal option:
*/
+ unsigned max_sectors = 1 << 24;
+
+ if (c->opts.large_journal)
+ max_sectors *= 4;
+
nr = clamp_t(unsigned, nr,
BCH_JOURNAL_BUCKETS_MIN,
- min(1 << 13,
- (1 << 24) / ca->mi.bucket_size));
+ max_sectors / ca->mi.bucket_size);
ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs);
err:
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index dd3f3434..3f06c4b2 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -1272,6 +1272,28 @@ static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_r
printbuf_exit(&buf);
}
+struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end)
+{
+ BUG_ON(start > end);
+
+ if (start == end)
+ return (struct u64_range) {};
+
+ start = bch2_journal_seq_next_nonblacklisted(c, start);
+ if (start >= end)
+ return (struct u64_range) {};
+
+ struct u64_range missing = {
+ .start = start,
+ .end = min(end, bch2_journal_seq_next_blacklisted(c, start)),
+ };
+
+ if (missing.start == missing.end)
+ return (struct u64_range) {};
+
+ return missing;
+}
+
noinline_for_stack
static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq)
{
@@ -1280,6 +1302,7 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e
struct genradix_iter radix_iter;
struct journal_replay *i, **_i, *prev = NULL;
+ /* Sequence number we expect to find next, to check for missing entries */
u64 seq = start_seq;
genradix_for_each(&c->journal_entries, radix_iter, _i) {
@@ -1290,43 +1313,31 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e
BUG_ON(seq > le64_to_cpu(i->j.seq));
- while (seq < le64_to_cpu(i->j.seq)) {
- while (seq < le64_to_cpu(i->j.seq) &&
- bch2_journal_seq_is_blacklisted(c, seq, false))
- seq++;
-
- if (seq == le64_to_cpu(i->j.seq))
- break;
-
- u64 missing_start = seq;
-
- while (seq < le64_to_cpu(i->j.seq) &&
- !bch2_journal_seq_is_blacklisted(c, seq, false))
- seq++;
-
- u64 missing_end = seq - 1;
+ struct u64_range missing;
+ while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) {
printbuf_reset(&buf);
prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)",
- missing_start, missing_end,
+ missing.start, missing.end - 1,
start_seq, end_seq);
- prt_printf(&buf, "\nprev at ");
if (prev) {
+ prt_printf(&buf, "\n%llu at ", le64_to_cpu(prev->j.seq));
bch2_journal_ptrs_to_text(&buf, c, prev);
prt_printf(&buf, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
- } else
- prt_printf(&buf, "(none)");
+ }
- prt_printf(&buf, "\nnext at ");
+ prt_printf(&buf, "\n%llu at ", le64_to_cpu(i->j.seq));
bch2_journal_ptrs_to_text(&buf, c, i);
prt_printf(&buf, ", continue?");
fsck_err(c, journal_entries_missing, "%s", buf.buf);
+
+ seq = missing.end;
}
prev = i;
- seq++;
+ seq = le64_to_cpu(i->j.seq) + 1;
}
fsck_err:
printbuf_exit(&buf);
diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h
index 6fa82c40..f53c5c81 100644
--- a/libbcachefs/journal_io.h
+++ b/libbcachefs/journal_io.h
@@ -71,6 +71,13 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct journal_replay *);
+struct u64_range {
+ u64 start;
+ u64 end;
+};
+
+struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64);
+
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
CLOSURE_CALLBACK(bch2_journal_write);
diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c
index af4fe416..6361809b 100644
--- a/libbcachefs/journal_seq_blacklist.c
+++ b/libbcachefs/journal_seq_blacklist.c
@@ -103,6 +103,52 @@ static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
return cmp_int(l->start, r->start);
}
+static int journal_seq_blacklist_table_end_cmp(const void *_l, const void *_r)
+{
+ const struct journal_seq_blacklist_table_entry *l = _l;
+ const struct journal_seq_blacklist_table_entry *r = _r;
+
+ return cmp_int(l->end, r->end);
+}
+
+u64 bch2_journal_seq_next_blacklisted(struct bch_fs *c, u64 seq)
+{
+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+
+ if (!t)
+ return U64_MAX;
+
+ struct journal_seq_blacklist_table_entry search = { .end = seq };
+ int idx = eytzinger0_find_gt(t->entries, t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_end_cmp,
+ &search);
+ if (idx < 0)
+ return U64_MAX;
+
+ return max(seq, t->entries[idx].start);
+}
+
+u64 bch2_journal_seq_next_nonblacklisted(struct bch_fs *c, u64 seq)
+{
+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+
+ if (!t)
+ return seq;
+
+ while (true) {
+ struct journal_seq_blacklist_table_entry search = { .start = seq };
+ int idx = eytzinger0_find_le(t->entries, t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_cmp,
+ &search);
+ if (idx < 0 || t->entries[idx].end <= seq)
+ return seq;
+
+ seq = t->entries[idx].end;
+ }
+}
+
bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
bool dirty)
{
diff --git a/libbcachefs/journal_seq_blacklist.h b/libbcachefs/journal_seq_blacklist.h
index f06942cc..389b789b 100644
--- a/libbcachefs/journal_seq_blacklist.h
+++ b/libbcachefs/journal_seq_blacklist.h
@@ -11,6 +11,9 @@ blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
: 0;
}
+u64 bch2_journal_seq_next_blacklisted(struct bch_fs *, u64);
+u64 bch2_journal_seq_next_nonblacklisted(struct bch_fs *, u64);
+
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
u64 bch2_journal_last_blacklisted_seq(struct bch_fs *);
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c
index c3f87c59..3e2b41ba 100644
--- a/libbcachefs/namei.c
+++ b/libbcachefs/namei.c
@@ -1027,7 +1027,7 @@ fsck_err:
if (repairing_parents) {
return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(trans->c, transaction_restart_nested);
}
return 0;
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index b0a76bd6..4a7a6058 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -234,6 +234,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_CASEFOLD, false, \
NULL, "Dirent lookups are casefolded") \
+ x(casefold_disabled, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Disable casefolding filesystem wide") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
@@ -338,6 +343,12 @@ enum fsck_err_opts {
OPT_UINT(0, U32_MAX), \
BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \
NULL, "Delay in milliseconds before automatic journal reclaim")\
+ x(large_journal, bool, \
+ OPT_FS|OPT_MOUNT|OPT_FORMAT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Allocate a bigger than normal journal: recovery from unclean "\
+ "shutdown will be slower, but more info will be available for debugging")\
x(move_bytes_in_flight, u32, \
OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(1024, U32_MAX), \
@@ -384,6 +395,11 @@ enum fsck_err_opts {
OPT_UINT(0, U64_MAX), \
BCH2_NO_SB_OPT, 0, \
NULL, "Rewind journal") \
+ x(journal_rewind_no_extents, bool, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Don't rewind extents when rewinding journal") \
x(recovery_passes, u64, \
OPT_FS|OPT_MOUNT, \
OPT_BITFIELD(bch2_recovery_passes), \
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 1c345b86..73b463c9 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -220,7 +220,7 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans,
return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(trans->c, transaction_restart_nested);
}
#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index d0b7e3a3..974f8bf9 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -1177,9 +1177,10 @@ int bch2_fs_initialize(struct bch_fs *c)
for_each_member_device(c, ca) {
m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
- ca->mi = bch2_mi_to_cpu(m);
}
+ bch2_sb_members_to_cpu(c);
+
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c
index c2c18c0a..c09ed2dd 100644
--- a/libbcachefs/recovery_passes.c
+++ b/libbcachefs/recovery_passes.c
@@ -313,6 +313,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
*/
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
+ bool rewind = in_recovery &&
+ r->curr_pass > pass &&
+ !(r->passes_complete & BIT_ULL(pass));
if (persistent
? !(c->sb.recovery_passes_required & BIT_ULL(pass))
@@ -323,6 +326,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
(r->passes_ratelimiting & BIT_ULL(pass)))
return true;
+ if (rewind)
+ return true;
+
return false;
}
@@ -337,7 +343,6 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
struct bch_fs_recovery *r = &c->recovery;
int ret = 0;
-
lockdep_assert_held(&c->sb_lock);
bch2_printbuf_make_room(out, 1024);
@@ -408,10 +413,8 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
{
int ret = 0;
- scoped_guard(mutex, &c->sb_lock) {
- if (!recovery_pass_needs_set(c, pass, &flags))
- return 0;
-
+ if (recovery_pass_needs_set(c, pass, &flags)) {
+ guard(mutex)(&c->sb_lock);
ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
bch2_write_super(c);
}
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index a535abd4..8d8e045b 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -64,6 +64,9 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
REFLINK_P_IDX(p.v),
le32_to_cpu(p.v->front_pad),
le32_to_cpu(p.v->back_pad));
+
+ if (REFLINK_P_ERROR(p.v))
+ prt_str(out, " error");
}
bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
@@ -164,7 +167,7 @@ static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bk
return 0;
return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(trans->c, transaction_restart_nested);
}
static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
@@ -239,7 +242,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
if (should_commit)
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(c, transaction_restart_nested);
}
err:
fsck_err:
@@ -269,13 +272,12 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
return k;
if (unlikely(!bkey_extent_is_reflink_data(k.k))) {
- unsigned size = min((u64) k.k->size,
- REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) -
- reflink_offset);
- bch2_key_resize(&iter->k, size);
+ u64 missing_end = min(k.k->p.offset,
+ REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad));
+ BUG_ON(reflink_offset == missing_end);
int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset,
- k.k->p.offset, should_commit);
+ missing_end, should_commit);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return bkey_s_c_err(ret);
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index bb1eddd6..dd4ee466 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -7,6 +7,7 @@ enum bch_fsck_flags {
FSCK_CAN_IGNORE = BIT(1),
FSCK_AUTOFIX = BIT(2),
FSCK_ERR_NO_LOG = BIT(3),
+ FSCK_ERR_SILENT = BIT(4),
};
#define BCH_SB_ERRS() \
@@ -157,6 +158,7 @@ enum bch_fsck_flags {
x(extent_ptrs_unwritten, 140, 0) \
x(extent_ptrs_written_and_unwritten, 141, 0) \
x(ptr_to_invalid_device, 142, 0) \
+ x(ptr_to_removed_device, 322, 0) \
x(ptr_to_duplicate_device, 143, 0) \
x(ptr_after_last_bucket, 144, 0) \
x(ptr_before_first_bucket, 145, 0) \
@@ -290,6 +292,7 @@ enum bch_fsck_flags {
x(inode_points_to_missing_dirent, 249, FSCK_AUTOFIX) \
x(inode_points_to_wrong_dirent, 250, FSCK_AUTOFIX) \
x(inode_bi_parent_nonzero, 251, 0) \
+ x(missing_inode_with_contents, 321, FSCK_AUTOFIX) \
x(dirent_to_missing_parent_subvol, 252, 0) \
x(dirent_not_visible_in_parent_subvol, 253, 0) \
x(subvol_fs_path_parent_wrong, 254, 0) \
@@ -301,7 +304,7 @@ enum bch_fsck_flags {
x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \
- x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
x(snapshot_node_missing, 264, FSCK_AUTOFIX) \
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \
@@ -314,7 +317,7 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
- x(invalid_btree_id, 274, 0) \
+ x(invalid_btree_id, 274, FSCK_AUTOFIX) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
@@ -331,7 +334,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 321, 0)
+ x(MAX, 323, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index 6245e342..f2abe92c 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -15,10 +15,15 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
- prt_printf(&buf, "pointer to nonexistent device %u in key\n", dev);
+ bool removed = test_bit(dev, c->devs_removed.d);
+
+ prt_printf(&buf, "pointer to %s device %u in key\n",
+ removed ? "removed" : "nonexistent", dev);
bch2_bkey_val_to_text(&buf, c, k);
- bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
+ bool print = removed
+ ? bch2_count_fsck_err(c, ptr_to_removed_device, &buf)
+ : bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
int ret = bch2_run_explicit_recovery_pass(c, &buf,
BCH_RECOVERY_PASS_check_allocations, 0);
@@ -32,7 +37,9 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
{
if (dev != BCH_SB_MEMBER_INVALID)
- bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
+ bch2_fs_inconsistent(c, "pointer to %s device %u",
+ test_bit(dev, c->devs_removed.d)
+ ? "removed" : "nonexistent", dev);
}
void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
@@ -413,6 +420,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *c)
}
}
+void bch2_sb_members_to_cpu(struct bch_fs *c)
+{
+ for_each_member_device(c, ca) {
+ struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
+ ca->mi = bch2_mi_to_cpu(&m);
+ }
+
+ struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ if (mi2)
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_member m = members_v2_get(mi2, i);
+ bool removed = uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
+ mod_bit(i, c->devs_removed.d, removed);
+ }
+}
+
void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index 5dcc2017..0d363a1c 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -365,6 +365,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
}
void bch2_sb_members_from_cpu(struct bch_fs *);
+void bch2_sb_members_to_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 38aeaa12..4c43d2a2 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -871,7 +871,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN,
0, k, ret) {
- if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
+ if (k.k->type == KEY_TYPE_snapshot_tree &&
+ le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
tree_id = k.k->p.offset;
break;
}
@@ -899,7 +900,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
0, k, ret) {
- if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
+ if (k.k->type == KEY_TYPE_subvolume &&
+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
snapshot->v.subvol = cpu_to_le32(k.k->p.offset);
SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true);
break;
diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h
index 6766bf67..6dcb118b 100644
--- a/libbcachefs/snapshot.h
+++ b/libbcachefs/snapshot.h
@@ -128,7 +128,7 @@ static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
guard(rcu)();
const struct snapshot_t *s = snapshot_t(c, id);
- return s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
+ return s ? s->children[0] : bch_err_throw(c, invalid_snapshot_node);
}
static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
diff --git a/libbcachefs/str_hash.c b/libbcachefs/str_hash.c
index 3e9f5922..d39fd426 100644
--- a/libbcachefs/str_hash.c
+++ b/libbcachefs/str_hash.c
@@ -204,7 +204,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
}
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(c, transaction_restart_nested);
err:
fsck_err:
printbuf_exit(&buf);
@@ -292,7 +292,7 @@ int bch2_str_hash_repair_key(struct btree_trans *trans,
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_commit;
+ bch_err_throw(c, transaction_restart_commit);
} else {
duplicate_entries:
ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
@@ -326,7 +326,7 @@ duplicate_entries:
}
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
- -BCH_ERR_transaction_restart_commit;
+ bch_err_throw(c, transaction_restart_commit);
}
out:
fsck_err:
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index a49376df..353a9278 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
-#if IS_ENABLED(CONFIG_UNICODE)
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
-#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
@@ -177,7 +175,7 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans,
}
bch2_trans_iter_exit(trans, iter);
- return bkey_s_c_err(ret ?: -BCH_ERR_ENOENT_str_hash_lookup);
+ return bkey_s_c_err(ret ?: bch_err_throw(trans->c, ENOENT_str_hash_lookup));
}
static __always_inline struct bkey_s_c
@@ -219,7 +217,7 @@ bch2_hash_hole(struct btree_trans *trans,
return 0;
bch2_trans_iter_exit(trans, iter);
- return ret ?: -BCH_ERR_ENOSPC_str_hash_create;
+ return ret ?: bch_err_throw(trans->c, ENOSPC_str_hash_create);
}
static __always_inline
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 02058744..353df662 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -70,7 +70,7 @@ static int check_subvol(struct btree_trans *trans,
if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
- return ret ?: -BCH_ERR_transaction_restart_nested;
+ return ret ?: bch_err_throw(c, transaction_restart_nested);
}
if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
@@ -310,7 +310,7 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
bch2_trans_iter_exit(trans, &iter);
return bkey_err(k) ?: k.k && k.k->p.inode == subvol
- ? -BCH_ERR_ENOTEMPTY_subvol_not_empty
+ ? bch_err_throw(trans->c, ENOTEMPTY_subvol_not_empty)
: 0;
}
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 6c2e1d64..85e460d1 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -632,10 +632,7 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
}
- for_each_member_device(c, ca) {
- struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
- ca->mi = bch2_mi_to_cpu(&m);
- }
+ bch2_sb_members_to_cpu(c);
}
static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index b2fcae49..6980cd5b 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -1025,15 +1025,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
}
#if IS_ENABLED(CONFIG_UNICODE)
- /* Default encoding until we can potentially have more as an option. */
- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
- if (IS_ERR(c->cf_encoding)) {
- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
- ret = -EINVAL;
- goto err;
+ if (!bch2_fs_casefold_enabled(c)) {
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
}
#else
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
@@ -1160,7 +1162,7 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
- if (IS_ENABLED(CONFIG_UNICODE))
+ if (c->cf_encoding)
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 05848375..50cf5165 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -44,6 +44,7 @@
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/string_choices.h>
#include <linux/sched/clock.h>
#include "util.h"
@@ -156,6 +157,7 @@ write_attribute(trigger_recalc_capacity);
write_attribute(trigger_delete_dead_snapshots);
write_attribute(trigger_emergency_read_only);
read_attribute(gc_gens_pos);
+__sysfs_attribute(read_fua_test, 0400);
read_attribute(uuid);
read_attribute(minor);
@@ -303,6 +305,116 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "nr_inodes:\t%llu\n", b.nr_inodes);
}
+static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bio *bio = NULL;
+ void *buf = NULL;
+ unsigned bs = c->opts.block_size, iters;
+ u64 end, test_duration = NSEC_PER_SEC * 2;
+ struct bch2_time_stats stats_nofua, stats_fua, stats_random;
+ int ret = 0;
+
+ bch2_time_stats_init_no_pcpu(&stats_nofua);
+ bch2_time_stats_init_no_pcpu(&stats_fua);
+ bch2_time_stats_init_no_pcpu(&stats_random);
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, READ, BCH_DEV_READ_REF_read_fua_test)) {
+ prt_str(out, "offline\n");
+ return 0;
+ }
+
+ struct block_device *bdev = ca->disk_sb.bdev;
+
+ bio = bio_kmalloc(1, GFP_KERNEL);
+ if (!bio) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ buf = kmalloc(bs, GFP_KERNEL);
+ if (!buf)
+ goto err;
+
+ end = ktime_get_ns() + test_duration;
+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ);
+ bch2_bio_map(bio, buf, bs);
+
+ u64 submit_time = ktime_get_ns();
+ ret = submit_bio_wait(bio);
+ bch2_time_stats_update(&stats_nofua, submit_time);
+
+ if (ret)
+ goto err;
+ }
+
+ end = ktime_get_ns() + test_duration;
+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ);
+ bch2_bio_map(bio, buf, bs);
+
+ u64 submit_time = ktime_get_ns();
+ ret = submit_bio_wait(bio);
+ bch2_time_stats_update(&stats_fua, submit_time);
+
+ if (ret)
+ goto err;
+ }
+
+ u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca);
+
+ end = ktime_get_ns() + test_duration;
+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ);
+ bio->bi_iter.bi_sector = (bch2_get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9;
+ bch2_bio_map(bio, buf, bs);
+
+ u64 submit_time = ktime_get_ns();
+ ret = submit_bio_wait(bio);
+ bch2_time_stats_update(&stats_random, submit_time);
+
+ if (ret)
+ goto err;
+ }
+
+ u64 ns_nofua = mean_and_variance_get_mean(stats_nofua.duration_stats);
+ u64 ns_fua = mean_and_variance_get_mean(stats_fua.duration_stats);
+ u64 ns_rand = mean_and_variance_get_mean(stats_random.duration_stats);
+
+ u64 stddev_nofua = mean_and_variance_get_stddev(stats_nofua.duration_stats);
+ u64 stddev_fua = mean_and_variance_get_stddev(stats_fua.duration_stats);
+ u64 stddev_rand = mean_and_variance_get_stddev(stats_random.duration_stats);
+
+ printbuf_tabstop_push(out, 8);
+ printbuf_tabstop_push(out, 12);
+ printbuf_tabstop_push(out, 12);
+ prt_printf(out, "This test must be run on an idle drive for accurate results\n");
+ prt_printf(out, "%s\n", dev_name(&ca->disk_sb.bdev->bd_device));
+ prt_printf(out, "fua support advertized: %s\n", str_yes_no(bdev_fua(bdev)));
+ prt_newline(out);
+ prt_printf(out, "ns:\tlatency\rstddev\r\n");
+ prt_printf(out, "nofua\t%llu\r%llu\r\n", ns_nofua, stddev_nofua);
+ prt_printf(out, "fua\t%llu\r%llu\r\n", ns_fua, stddev_fua);
+ prt_printf(out, "random\t%llu\r%llu\r\n", ns_rand, stddev_rand);
+
+ bool read_cache = ns_nofua * 2 < ns_rand;
+ bool fua_cached = read_cache && ns_fua < (ns_nofua + ns_rand) / 2;
+
+ if (!read_cache)
+ prt_str(out, "reads don't appear to be cached - safe\n");
+ else if (!fua_cached)
+ prt_str(out, "fua reads don't appear to be cached - safe\n");
+ else
+ prt_str(out, "fua reads appear to be cached - unsafe\n");
+err:
+ kfree(buf);
+ kfree(bio);
+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_read_fua_test);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
SHOW(bch2_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
@@ -840,6 +952,9 @@ SHOW(bch2_dev)
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c, ca);
+ if (attr == &sysfs_read_fua_test)
+ return bch2_read_fua_test(out, ca);
+
int opt_id = bch2_opt_lookup(attr->name);
if (opt_id >= 0)
return sysfs_opt_show(c, ca, opt_id, out);
@@ -902,6 +1017,8 @@ struct attribute *bch2_dev_files[] = {
&sysfs_io_latency_stats_write,
&sysfs_congested,
+ &sysfs_read_fua_test,
+
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 9c5a9c55..9324ef32 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -92,58 +92,6 @@ DECLARE_EVENT_CLASS(trans_str_nocaller,
__entry->trans_fn, __get_str(str))
);
-DECLARE_EVENT_CLASS(btree_node_nofs,
- TP_PROTO(struct bch_fs *c, struct btree *b),
- TP_ARGS(c, b),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(u8, level )
- __field(u8, btree_id )
- TRACE_BPOS_entries(pos)
- ),
-
- TP_fast_assign(
- __entry->dev = c->dev;
- __entry->level = b->c.level;
- __entry->btree_id = b->c.btree_id;
- TRACE_BPOS_assign(pos, b->key.k.p);
- ),
-
- TP_printk("%d,%d %u %s %llu:%llu:%u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->level,
- bch2_btree_id_str(__entry->btree_id),
- __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
-);
-
-DECLARE_EVENT_CLASS(btree_node,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __array(char, trans_fn, 32 )
- __field(u8, level )
- __field(u8, btree_id )
- TRACE_BPOS_entries(pos)
- ),
-
- TP_fast_assign(
- __entry->dev = trans->c->dev;
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->level = b->c.level;
- __entry->btree_id = b->c.btree_id;
- TRACE_BPOS_assign(pos, b->key.k.p);
- ),
-
- TP_printk("%d,%d %s %u %s %llu:%llu:%u",
- MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn,
- __entry->level,
- bch2_btree_id_str(__entry->btree_id),
- __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
-);
-
DECLARE_EVENT_CLASS(bch_fs,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c),
@@ -527,9 +475,9 @@ TRACE_EVENT(btree_cache_scan,
__entry->nr_to_scan, __entry->can_free, __entry->ret)
);
-DEFINE_EVENT(btree_node_nofs, btree_cache_reap,
- TP_PROTO(struct bch_fs *c, struct btree *b),
- TP_ARGS(c, b)
+DEFINE_EVENT(fs_str, btree_cache_reap,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail,
@@ -554,39 +502,24 @@ DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock,
/* Btree */
-DEFINE_EVENT(btree_node, btree_node_read,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_read,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-TRACE_EVENT(btree_node_write,
- TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
- TP_ARGS(b, bytes, sectors),
-
- TP_STRUCT__entry(
- __field(enum btree_node_type, type)
- __field(unsigned, bytes )
- __field(unsigned, sectors )
- ),
-
- TP_fast_assign(
- __entry->type = btree_node_type(b);
- __entry->bytes = bytes;
- __entry->sectors = sectors;
- ),
-
- TP_printk("bkey type %u bytes %u sectors %u",
- __entry->type , __entry->bytes, __entry->sectors)
+DEFINE_EVENT(fs_str, btree_node_write,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_alloc,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_alloc,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_free,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_free,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
TRACE_EVENT(btree_reserve_get_fail,
@@ -617,29 +550,29 @@ TRACE_EVENT(btree_reserve_get_fail,
__entry->ret)
);
-DEFINE_EVENT(btree_node, btree_node_compact,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_set_root,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_merge,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_rewrite,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_split,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_merge,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_rewrite,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_compact,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
-DEFINE_EVENT(btree_node, btree_node_set_root,
- TP_PROTO(struct btree_trans *trans, struct btree *b),
- TP_ARGS(trans, b)
+DEFINE_EVENT(fs_str, btree_node_split,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
TRACE_EVENT(btree_path_relock_fail,
@@ -1397,6 +1330,11 @@ DEFINE_EVENT(fs_str, data_update,
TP_ARGS(c, str)
);
+DEFINE_EVENT(fs_str, data_update_done_no_rw_devs,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
DEFINE_EVENT(fs_str, io_move_pred,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index df9a6071..7a4436fd 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -299,17 +299,12 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne
if (ret)
return ret;
- if (!down_read_trylock(&task->signal->exec_update_lock))
- return -1;
-
do {
nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
} while (nr_entries == stack->size &&
!(ret = darray_make_room_gfp(stack, stack->size * 2, gfp)));
stack->nr = nr_entries;
- up_read(&task->signal->exec_update_lock);
-
return ret;
#else
return 0;
@@ -617,17 +612,10 @@ void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_contro
void bch2_bio_map(struct bio *bio, void *base, size_t size)
{
- while (size) {
- struct page *page = is_vmalloc_addr(base)
- ? vmalloc_to_page(base)
- : virt_to_page(base);
- unsigned offset = offset_in_page(base);
- unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
-
- BUG_ON(!bio_add_page(bio, page, len, offset));
- size -= len;
- base += len;
- }
+ if (is_vmalloc_addr(base))
+ bio_add_vmalloc(bio, base, size);
+ else
+ bio_add_virt_nofail(bio, base, size);
}
int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)