summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-06-13 23:42:59 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-06-13 23:42:59 -0400
commit5b9b063bb71cb1d4dfe124ca886681682a7ba416 (patch)
treeb9fdd1edbadbb2ac6102d92303671cc9d6b0968b
parent792222046d80bd8715a3ed59faf9e5d5bda58417 (diff)
Update bcachefs sources to 54de30ef9de8 bcachefs: Kill unused tracepoints
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/bcachefs.h2
-rw-r--r--libbcachefs/btree_gc.c3
-rw-r--r--libbcachefs/btree_io.c16
-rw-r--r--libbcachefs/btree_iter.c169
-rw-r--r--libbcachefs/btree_journal_iter.c69
-rw-r--r--libbcachefs/btree_journal_iter_types.h5
-rw-r--r--libbcachefs/btree_locking.h6
-rw-r--r--libbcachefs/btree_trans_commit.c18
-rw-r--r--libbcachefs/btree_types.h1
-rw-r--r--libbcachefs/btree_update_interior.c31
-rw-r--r--libbcachefs/btree_write_buffer.c3
-rw-r--r--libbcachefs/dirent.c2
-rw-r--r--libbcachefs/errcode.h5
-rw-r--r--libbcachefs/extent_update.c13
-rw-r--r--libbcachefs/fs.c4
-rw-r--r--libbcachefs/fsck.c55
-rw-r--r--libbcachefs/inode.c2
-rw-r--r--libbcachefs/journal.c18
-rw-r--r--libbcachefs/journal.h2
-rw-r--r--libbcachefs/journal_io.c21
-rw-r--r--libbcachefs/namei.c10
-rw-r--r--libbcachefs/opts.h5
-rw-r--r--libbcachefs/recovery.c11
-rw-r--r--libbcachefs/sb-errors_format.h4
-rw-r--r--libbcachefs/sb-members.c34
-rw-r--r--libbcachefs/snapshot.c8
-rw-r--r--libbcachefs/str_hash.h2
-rw-r--r--libbcachefs/super.c35
-rw-r--r--libbcachefs/trace.h125
30 files changed, 411 insertions, 270 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index e4007c01..4a0284df 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-5e40e376e242076ba1148c100993e99f339c2391
+54de30ef9de88c274997d8f78d21297d356c7bc0
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index d0d3a686..5a1cede2 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -862,7 +862,7 @@ struct bch_fs {
DARRAY(enum bcachefs_metadata_version)
incompat_versions_requested;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
struct unicode_map *cf_encoding;
#endif
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index ebd2be54..e92cf392 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -442,7 +442,8 @@ again:
if (!ret && !IS_ERR_OR_NULL(prev)) {
BUG_ON(cur);
- ret = btree_repair_node_end(trans, b, prev, pulled_from_scan);
+ ret = lockrestart_do(trans,
+ btree_repair_node_end(trans, b, prev, pulled_from_scan));
if (ret == DID_FILL_FROM_SCAN) {
new_pass = true;
ret = 0;
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 6787d5b9..d8f3c4c6 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BCH_VERSION_MAJOR(version),
BCH_VERSION_MINOR(version));
- if (btree_err_on(version < c->sb.version_min,
+ if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes &&
+ btree_err_on(version < c->sb.version_min,
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, NULL,
btree_node_bset_older_than_sb_min,
"bset version %u older than superblock version_min %u",
version, c->sb.version_min)) {
- mutex_lock(&c->sb_lock);
- c->disk_sb.sb->version_min = cpu_to_le16(version);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ if (bch2_version_compatible(version)) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->version_min = cpu_to_le16(version);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ } else {
+ /* We have no idea what's going on: */
+ i->version = cpu_to_le16(c->sb.version);
+ }
}
if (btree_err_on(BCH_VERSION_MAJOR(version) >
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index b7840337..06160399 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2076,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter
static noinline
void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_s_c *k)
+ struct bpos search_key, struct bkey_s_c *k)
{
struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key;
trans_for_each_update(trans, i)
if (!i->key_cache_already_flushed &&
i->btree_id == iter->btree_id &&
- bpos_le(i->k->k.p, iter->pos) &&
+ bpos_le(i->k->k.p, search_key) &&
bpos_ge(i->k->k.p, k->k ? k->k->p : end)) {
iter->k = i->k->k;
*k = bkey_i_to_s_c(i->k);
@@ -2092,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_
static noinline
void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
@@ -2100,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter
trans_for_each_update(trans, i)
if (!i->key_cache_already_flushed &&
i->btree_id == iter->btree_id &&
- bpos_ge(i->k->k.p, path->pos) &&
+ bpos_ge(i->k->k.p, search_key) &&
bpos_le(i->k->k.p, k->k ? k->k->p : end)) {
iter->k = i->k->k;
*k = bkey_i_to_s_c(i->k);
@@ -2122,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_
static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_pos,
struct bpos end_pos)
{
struct btree_path *path = btree_iter_path(trans, iter);
return bch2_journal_keys_peek_max(trans->c, iter->btree_id,
path->level,
- path->pos,
+ search_pos,
end_pos,
&iter->journal_idx);
}
@@ -2138,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
struct btree_iter *iter)
{
struct btree_path *path = btree_iter_path(trans, iter);
- struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos);
+ struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos);
if (k) {
iter->k = k->k;
@@ -2151,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
static noinline
void btree_trans_peek_journal(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
- bch2_btree_journal_peek(trans, iter,
+ bch2_btree_journal_peek(trans, iter, search_key,
k->k ? k->k->p : path_l(path)->b->key.k.p);
if (next_journal) {
iter->k = next_journal->k;
@@ -2165,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans,
static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bpos end_pos)
{
struct btree_path *path = btree_iter_path(trans, iter);
return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id,
path->level,
- path->pos,
+ search_key,
end_pos,
&iter->journal_idx);
}
@@ -2179,11 +2183,12 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
static noinline
void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
- bch2_btree_journal_peek_prev(trans, iter,
+ bch2_btree_journal_peek_prev(trans, iter, search_key,
k->k ? k->k->p : path_l(path)->b->key.k.p);
if (next_journal) {
@@ -2292,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
- btree_trans_peek_journal(trans, iter, &k);
+ btree_trans_peek_journal(trans, iter, search_key, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
- bch2_btree_trans_peek_updates(trans, iter, &k);
+ bch2_btree_trans_peek_updates(trans, iter, search_key, &k);
if (k.k && bkey_deleted(k.k)) {
/*
@@ -2326,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct
}
bch2_btree_iter_verify(trans, iter);
+
+ if (trace___btree_iter_peek_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace___btree_iter_peek(trans->c, buf.buf);
+ }
+
return k;
}
@@ -2484,6 +2503,19 @@ out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
+ if (trace_btree_iter_peek_max_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_max(trans->c, buf.buf);
+ }
+
return k;
end:
bch2_btree_iter_set_pos(trans, iter, end);
@@ -2557,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
- btree_trans_peek_prev_journal(trans, iter, &k);
+ btree_trans_peek_prev_journal(trans, iter, search_key, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
- bch2_btree_trans_peek_prev_updates(trans, iter, &k);
+ bch2_btree_trans_peek_prev_updates(trans, iter, search_key, &k);
if (likely(k.k && !bkey_deleted(k.k))) {
break;
@@ -2724,6 +2756,19 @@ out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(trans, iter);
+
+ if (trace_btree_iter_peek_prev_min_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_prev_min(trans->c, buf.buf);
+ }
return k;
end:
bch2_btree_iter_set_pos(trans, iter, end);
@@ -2767,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
- if (iter->pos.inode == KEY_INODE_MAX)
- return bkey_s_c_null;
+ if (iter->pos.inode == KEY_INODE_MAX) {
+ k = bkey_s_c_null;
+ goto out2;
+ }
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos));
}
@@ -2785,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
}
struct btree_path *path = btree_iter_path(trans, iter);
- if (unlikely(!btree_path_node(path, path->level)))
- return bkey_s_c_null;
+ if (unlikely(!btree_path_node(path, path->level))) {
+ k = bkey_s_c_null;
+ goto out2;
+ }
btree_path_set_should_be_locked(trans, path);
@@ -2879,7 +2928,20 @@ out:
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_iter_verify_ret(trans, iter, k);
if (unlikely(ret))
- return bkey_s_c_err(ret);
+ k = bkey_s_c_err(ret);
+out2:
+ if (trace_btree_iter_peek_slot_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_slot(trans->c, buf.buf);
+ }
return k;
}
@@ -3159,46 +3221,32 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
mutex_unlock(&s->lock);
}
- if (trans->used_mempool) {
- if (trans->mem_bytes >= new_bytes)
- goto out_change_top;
-
- /* No more space from mempool item, need malloc new one */
- new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
- if (unlikely(!new_mem)) {
- bch2_trans_unlock(trans);
-
- new_mem = kmalloc(new_bytes, GFP_KERNEL);
- if (!new_mem)
- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) {
+ EBUG_ON(trans->mem_bytes >= new_bytes);
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ }
- ret = bch2_trans_relock(trans);
- if (ret) {
- kfree(new_mem);
- return ERR_PTR(ret);
- }
- }
- memcpy(new_mem, trans->mem, trans->mem_top);
- trans->used_mempool = false;
- mempool_free(trans->mem, &c->btree_trans_mem_pool);
- goto out_new_mem;
+ if (old_bytes) {
+ trans->realloc_bytes_required = new_bytes;
+ trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
- new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ EBUG_ON(trans->mem);
+
+ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
if (unlikely(!new_mem)) {
bch2_trans_unlock(trans);
- new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+ new_mem = kmalloc(new_bytes, GFP_KERNEL);
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
- memcpy(new_mem, trans->mem, trans->mem_top);
trans->used_mempool = true;
- kfree(trans->mem);
}
- if (!new_mem)
- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ EBUG_ON(!new_mem);
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
@@ -3207,18 +3255,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (ret)
return ERR_PTR(ret);
}
-out_new_mem:
+
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
- if (old_bytes) {
- trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart_ip(trans,
- BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
- }
-out_change_top:
- bch2_trans_kmalloc_trace(trans, size, ip);
-
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@@ -3279,6 +3319,27 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->restart_count++;
trans->mem_top = 0;
+ if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) {
+ EBUG_ON(!trans->mem || !trans->mem_bytes);
+ unsigned new_bytes = trans->realloc_bytes_required;
+ void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+
+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX);
+
+ if (!new_mem) {
+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ trans->used_mempool = true;
+ kfree(trans->mem);
+ }
+ }
+ trans->mem = new_mem;
+ trans->mem_bytes = new_bytes;
+ }
+
trans_for_each_path(trans, path, i) {
path->should_be_locked = false;
diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c
index cf739875..8f94c3bd 100644
--- a/libbcachefs/btree_journal_iter.c
+++ b/libbcachefs/btree_journal_iter.c
@@ -141,8 +141,8 @@ search:
if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
- while (*idx &&
- __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
+ while (*idx < keys->nr &&
+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) >= 0) {
(*idx)++;
iters++;
if (iters == 10) {
@@ -641,10 +641,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
{
const struct journal_key *l = _l;
const struct journal_key *r = _r;
+ int rewind = l->rewind && r->rewind ? -1 : 1;
return journal_key_cmp(l, r) ?:
- cmp_int(l->journal_seq, r->journal_seq) ?:
- cmp_int(l->journal_offset, r->journal_offset);
+ ((cmp_int(l->journal_seq, r->journal_seq) ?:
+ cmp_int(l->journal_offset, r->journal_offset)) * rewind);
}
void bch2_journal_keys_put(struct bch_fs *c)
@@ -712,37 +713,59 @@ int bch2_journal_keys_sort(struct bch_fs *c)
struct journal_replay *i, **_i;
struct journal_keys *keys = &c->journal_keys;
size_t nr_read = 0;
+ u64 nr_entries = 0, nr_non_ignored = 0;
+
+ u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX;
genradix_for_each(&c->journal_entries, iter, _i) {
i = *_i;
+ nr_entries += i != NULL;
+
if (journal_replay_ignore(i))
continue;
+ nr_non_ignored++;
+
cond_resched();
- for_each_jset_key(k, entry, &i->j) {
- struct journal_key n = (struct journal_key) {
- .btree_id = entry->btree_id,
- .level = entry->level,
- .k = k,
- .journal_seq = le64_to_cpu(i->j.seq),
- .journal_offset = k->_data - i->j._data,
- };
-
- if (darray_push(keys, n)) {
- __journal_keys_sort(keys);
-
- if (keys->nr * 8 > keys->size * 7) {
- bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
- keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
- return bch_err_throw(c, ENOMEM_journal_keys_sort);
+ vstruct_for_each(&i->j, entry) {
+ bool rewind = !entry->level &&
+ !btree_id_is_alloc(entry->btree_id) &&
+ le64_to_cpu(i->j.seq) >= rewind_seq;
+
+ if (entry->type != (rewind
+ ? BCH_JSET_ENTRY_overwrite
+ : BCH_JSET_ENTRY_btree_keys))
+ continue;
+
+ if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start)
+ continue;
+
+ jset_entry_for_each_key(entry, k) {
+ struct journal_key n = (struct journal_key) {
+ .btree_id = entry->btree_id,
+ .level = entry->level,
+ .rewind = rewind,
+ .k = k,
+ .journal_seq = le64_to_cpu(i->j.seq),
+ .journal_offset = k->_data - i->j._data,
+ };
+
+ if (darray_push(keys, n)) {
+ __journal_keys_sort(keys);
+
+ if (keys->nr * 8 > keys->size * 7) {
+ bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
+ keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
+ return bch_err_throw(c, ENOMEM_journal_keys_sort);
+ }
+
+ BUG_ON(darray_push(keys, n));
}
- BUG_ON(darray_push(keys, n));
+ nr_read++;
}
-
- nr_read++;
}
}
diff --git a/libbcachefs/btree_journal_iter_types.h b/libbcachefs/btree_journal_iter_types.h
index 8b773823..86aacb25 100644
--- a/libbcachefs/btree_journal_iter_types.h
+++ b/libbcachefs/btree_journal_iter_types.h
@@ -11,8 +11,9 @@ struct journal_key {
u32 journal_offset;
enum btree_id btree_id:8;
unsigned level:8;
- bool allocated;
- bool overwritten;
+ bool allocated:1;
+ bool overwritten:1;
+ bool rewind:1;
struct journal_key_range_overwritten __rcu *
overwritten_range;
struct bkey_i *k;
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index 9adca77e..f2173a33 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st
EBUG_ON(!btree_node_locked(path, path->level));
EBUG_ON(path->uptodate);
- path->should_be_locked = true;
- trace_btree_path_should_be_locked(trans, path);
+ if (!path->should_be_locked) {
+ path->should_be_locked = true;
+ trace_btree_path_should_be_locked(trans, path);
+ }
}
static inline void __btree_path_set_level_up(struct btree_trans *trans,
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index d9710801..639ef75b 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -595,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
int ret = 0;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
-
+#if 0
+ /* todo: bring back dynamic fault injection */
if (race_fault()) {
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
}
-
+#endif
/*
* Check if the insert will fit in the leaf node with the write lock
* held, otherwise another thread could write the node changing the
@@ -757,6 +758,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
btree_trans_journal_entries_start(trans),
trans->journal_entries.u64s);
+ EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s);
+
trans->journal_res.offset += trans->journal_entries.u64s;
trans->journal_res.u64s -= trans->journal_entries.u64s;
@@ -1003,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
{
struct btree_insert_entry *errored_at = NULL;
struct bch_fs *c = trans->c;
+ unsigned journal_u64s = 0;
int ret = 0;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
@@ -1031,10 +1035,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
- trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s);
+ journal_u64s = jset_u64s(trans->accounting.u64s);
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
if (trans->journal_transaction_names)
- trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
+ journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
trans_for_each_update(trans, i) {
struct btree_path *path = trans->paths + i->path;
@@ -1054,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
continue;
/* we're going to journal the key being updated: */
- trans->journal_u64s += jset_u64s(i->k->k.u64s);
+ journal_u64s += jset_u64s(i->k->k.u64s);
/* and we're also going to log the overwrite: */
if (trans->journal_transaction_names)
- trans->journal_u64s += jset_u64s(i->old_k.u64s);
+ journal_u64s += jset_u64s(i->old_k.u64s);
}
if (trans->extra_disk_res) {
@@ -1076,6 +1080,8 @@ retry:
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
+ trans->journal_u64s = journal_u64s + trans->journal_entries.u64s;
+
ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_);
/* make sure we didn't drop or screw up locks: */
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 3aa4a602..112170fd 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -497,6 +497,7 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
+ unsigned realloc_bytes_required;
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 9e281d18..e7758460 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -290,6 +290,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct write_point *wp;
struct btree *b;
+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+ struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
@@ -308,8 +310,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- bkey_copy(&b->key, &a->k);
- b->ob = a->ob;
+ obs = a->ob;
+ bkey_copy(&tmp.k, &a->k);
mutex_unlock(&c->btree_reserve_cache_lock);
goto out;
}
@@ -343,12 +345,14 @@ retry:
goto retry;
}
- bkey_btree_ptr_v2_init(&b->key);
- bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
+ bkey_btree_ptr_v2_init(&tmp.k);
+ bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
- bch2_open_bucket_get(c, wp, &b->ob);
+ bch2_open_bucket_get(c, wp, &obs);
bch2_alloc_sectors_done(c, wp);
out:
+ bkey_copy(&b->key, &tmp.k);
+ b->ob = obs;
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -509,25 +513,30 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
unsigned flags,
struct closure *cl)
{
+ struct btree *b;
+ unsigned interior;
+ int ret = 0;
+
BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
/*
* Protects reaping from the btree node cache and using the btree node
* open bucket reserve:
*/
- int ret = bch2_btree_cache_cannibalize_lock(trans, cl);
+ ret = bch2_btree_cache_cannibalize_lock(trans, cl);
if (ret)
return ret;
- for (unsigned interior = 0; interior < 2; interior++) {
+ for (interior = 0; interior < 2; interior++) {
struct prealloc_nodes *p = as->prealloc_nodes + interior;
while (p->nr < nr_nodes[interior]) {
- struct btree *b = __bch2_btree_node_alloc(trans, &as->disk_res,
- cl, interior, target, flags);
- ret = PTR_ERR_OR_ZERO(b);
- if (ret)
+ b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
+ interior, target, flags);
+ if (IS_ERR(b)) {
+ ret = PTR_ERR(b);
goto err;
+ }
p->b[p->nr++] = b;
}
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 90b21e61..21b5c03d 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -676,6 +676,9 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
goto err;
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+
+ /* can we avoid the unconditional restart? */
+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_);
ret = bch_err_throw(c, transaction_restart_write_buffer_flush);
}
err:
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 5c3df997..300f7cc8 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -18,7 +18,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
int ret = PTR_ERR_OR_ZERO(buf);
if (ret)
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index ac326413..86a842f1 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -137,7 +137,6 @@
x(BCH_ERR_transaction_restart, transaction_restart_relock) \
x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \
x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \
- x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \
x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \
x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \
x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \
@@ -148,11 +147,8 @@
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
- x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
- x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
- x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
@@ -241,7 +237,6 @@
x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \
x(BCH_ERR_journal_res_blocked, journal_stuck) \
x(BCH_ERR_journal_res_blocked, journal_retry_open) \
- x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \
x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \
x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \
diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c
index b899ee75..e76e58a5 100644
--- a/libbcachefs/extent_update.c
+++ b/libbcachefs/extent_update.c
@@ -139,6 +139,17 @@ int bch2_extent_trim_atomic(struct btree_trans *trans,
if (ret)
return ret;
- bch2_cut_back(end, k);
+ /* tracepoint */
+
+ if (bpos_lt(end, k->k.p)) {
+ if (trace_extent_trim_atomic_enabled()) {
+ CLASS(printbuf, buf)();
+ bch2_bpos_to_text(&buf, end);
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k));
+ trace_extent_trim_atomic(trans->c, buf.buf);
+ }
+ bch2_cut_back(end, k);
+ }
return 0;
}
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 40b633cd..3063a8dd 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -722,7 +722,7 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (IS_ERR(inode))
inode = NULL;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(vdir)) {
/*
* Do not cache a negative dentry in casefolded directories
@@ -2564,7 +2564,7 @@ got_sb:
sb->s_shrink->seeks = 0;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
sb->s_encoding = c->cf_encoding;
#endif
generic_set_sb_d_ops(sb);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 7736103f..48810a8e 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -806,7 +806,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
if (!n->whiteout) {
return bch2_inode_unpack(inode, &n->inode);
} else {
- n->inode.bi_inum = inode.k->p.inode;
+ n->inode.bi_inum = inode.k->p.offset;
n->inode.bi_snapshot = inode.k->p.snapshot;
return 0;
}
@@ -903,17 +903,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
- struct bch_inode_unpacked new = i->inode;
- struct bkey_i whiteout;
-
- new.bi_snapshot = k.k->p.snapshot;
-
if (!i->whiteout) {
+ struct bch_inode_unpacked new = i->inode;
+ new.bi_snapshot = k.k->p.snapshot;
ret = __bch2_fsck_write_inode(trans, &new);
} else {
+ struct bkey_i whiteout;
bkey_init(&whiteout.k);
whiteout.k.type = KEY_TYPE_whiteout;
- whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot);
+ whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot);
ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
&whiteout,
BTREE_UPDATE_internal_snapshot_node);
@@ -1822,18 +1820,39 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
!key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
- if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+ u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9;
+
+ if (fsck_err_on(k.k->p.offset > last_block &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- struct btree_iter iter2;
+ struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout));
+ ret = PTR_ERR_OR_ZERO(whiteout);
+ if (ret)
+ goto err;
+
+ bkey_init(&whiteout->k);
+ whiteout->k.p = SPOS(k.k->p.inode,
+ last_block,
+ i->inode.bi_snapshot);
+ bch2_key_resize(&whiteout->k,
+ min(KEY_SIZE_MAX & (~0 << c->block_bits),
+ U64_MAX - whiteout->k.p.offset));
+
- bch2_trans_copy_iter(trans, &iter2, iter);
- bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
+ /*
+ * Need a normal (not BTREE_ITER_all_snapshots)
+ * iterator, if we're deleting in a different
+ * snapshot and need to emit a whiteout
+ */
+ struct btree_iter iter2;
+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents,
+ bkey_start_pos(&whiteout->k),
+ BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
- bch2_btree_delete_at(trans, &iter2,
+ bch2_trans_update(trans, &iter2, whiteout,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &iter2);
if (ret)
@@ -2184,7 +2203,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
#endif
@@ -2474,6 +2493,14 @@ int bch2_check_root(struct bch_fs *c)
return ret;
}
+static bool darray_u32_has(darray_u32 *d, u32 v)
+{
+ darray_for_each(*d, i)
+ if (*i == v)
+ return true;
+ return false;
+}
+
static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
@@ -2501,7 +2528,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
u32 parent = le32_to_cpu(s.v->fs_path_parent);
- if (darray_find(subvol_path, parent)) {
+ if (darray_u32_has(&subvol_path, parent)) {
printbuf_reset(&buf);
prt_printf(&buf, "subvolume loop:\n");
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index ceed2eb7..53e5dc1f 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -1265,7 +1265,7 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
{
struct bch_fs *c = trans->c;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index dda802a6..df71af00 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1474,14 +1474,13 @@ void bch2_fs_journal_stop(struct journal *j)
clear_bit(JOURNAL_running, &j->flags);
}
-int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
+int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_entry_pin_list *p;
struct journal_replay *i, **_i;
struct genradix_iter iter;
bool had_entries = false;
- u64 last_seq = cur_seq, nr, seq;
/*
*
@@ -1495,17 +1494,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
return -EINVAL;
}
- genradix_for_each_reverse(&c->journal_entries, iter, _i) {
- i = *_i;
-
- if (journal_replay_ignore(i))
- continue;
+ /* Clean filesystem? */
+ if (!last_seq)
+ last_seq = cur_seq;
- last_seq = le64_to_cpu(i->j.last_seq);
- break;
- }
-
- nr = cur_seq - last_seq;
+ u64 nr = cur_seq - last_seq;
/*
* Extra fudge factor, in case we crashed when the journal pin fifo was
@@ -1532,6 +1525,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
+ u64 seq;
fifo_for_each_entry_ptr(p, &j->pin, seq)
journal_pin_list_init(p, 1);
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 83734fe4..97790703 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -453,7 +453,7 @@ int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
-int bch2_fs_journal_start(struct journal *, u64);
+int bch2_fs_journal_start(struct journal *, u64, u64);
void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 0b15d71a..afbf12e8 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct printbuf buf = PRINTBUF;
int ret = JOURNAL_ENTRY_ADD_OK;
+ if (last_seq && c->opts.journal_rewind)
+ last_seq = min(last_seq, c->opts.journal_rewind);
+
if (!c->journal.oldest_seq_found_ondisk ||
le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk)
c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq);
@@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c,
printbuf_reset(&buf);
prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
*last_seq, *blacklist_seq - 1);
+
+ /*
+ * Drop blacklisted entries and entries older than last_seq (or start of
+ * journal rewind:
+ */
+ u64 drop_before = *last_seq;
+ if (c->opts.journal_rewind) {
+ drop_before = min(drop_before, c->opts.journal_rewind);
+ prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
+ }
+
+ *last_seq = drop_before;
if (*start_seq != *blacklist_seq)
prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
bch_info(c, "%s", buf.buf);
-
- /* Drop blacklisted entries and entries older than last_seq: */
genradix_for_each(&c->journal_entries, radix_iter, _i) {
i = *_i;
@@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c,
continue;
seq = le64_to_cpu(i->j.seq);
- if (seq < *last_seq) {
+ if (seq < drop_before) {
journal_replay_free(c, i, false);
continue;
}
@@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c,
}
}
- ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1);
+ ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1);
if (ret)
goto err;
diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c
index 24120037..779c22eb 100644
--- a/libbcachefs/namei.c
+++ b/libbcachefs/namei.c
@@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
+ if (S_ISDIR(mode)) {
+ ret = bch2_maybe_propagate_has_case_insensitive(trans,
+ (subvol_inum) {
+ new_inode->bi_subvol ?: dir.subvol,
+ new_inode->bi_inum },
+ new_inode);
+ if (ret)
+ goto err;
+ }
+
if (S_ISDIR(mode) &&
!new_inode->bi_subvol)
new_inode->bi_depth = dir_u->bi_depth + 1;
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 2a026062..b0a76bd6 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -379,6 +379,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Exit recovery immediately prior to journal replay")\
+ x(journal_rewind, u64, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_UINT(0, U64_MAX), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Rewind journal") \
x(recovery_passes, u64, \
OPT_FS|OPT_MOUNT, \
OPT_BITFIELD(bch2_recovery_passes), \
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 0b21fa6f..37f2cc1e 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -692,7 +692,7 @@ static bool check_version_upgrade(struct bch_fs *c)
ret = true;
}
- if (new_version > c->sb.version_incompat &&
+ if (new_version > c->sb.version_incompat_allowed &&
c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
struct printbuf buf = PRINTBUF;
@@ -757,6 +757,11 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.nochanges)
c->opts.read_only = true;
+ if (c->opts.journal_rewind) {
+ bch_info(c, "rewinding journal, fsck required");
+ c->opts.fsck = true;
+ }
+
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
@@ -965,7 +970,7 @@ use_clean:
ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
journal_seq, last_seq, blacklist_seq - 1) ?:
- bch2_fs_journal_start(&c->journal, journal_seq);
+ bch2_fs_journal_start(&c->journal, last_seq, journal_seq);
if (ret)
goto err;
@@ -1181,7 +1186,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- ret = bch2_fs_journal_start(&c->journal, 1);
+ ret = bch2_fs_journal_start(&c->journal, 1, 1);
if (ret)
goto err;
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index d06e7388..82ecee3d 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -217,7 +217,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
- x(snapshot_key_missing_inode_snapshot, 314, 0) \
+ x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
@@ -253,7 +253,7 @@ enum bch_fsck_flags {
x(extent_overlapping, 215, 0) \
x(key_in_missing_inode, 216, 0) \
x(key_in_wrong_inode_type, 217, 0) \
- x(extent_past_end_of_inode, 218, 0) \
+ x(extent_past_end_of_inode, 218, FSCK_AUTOFIX) \
x(dirent_empty_name, 219, 0) \
x(dirent_val_too_big, 220, 0) \
x(dirent_name_too_long, 221, 0) \
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index 363eb0c6..6245e342 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
{
struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
- unsigned i;
- for (i = 0; i < sb->nr_devices; i++)
+ if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) {
+ prt_printf(out, "field ends before start of entries");
+ return;
+ }
+
+ unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]);
+ if (nr != sb->nr_devices)
+ prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices);
+
+ for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
member_to_text(out, members_v1_get(mi, i), gi, sb, i);
}
@@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
{
struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
- unsigned i;
- for (i = 0; i < sb->nr_devices; i++)
+ if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) {
+ prt_printf(out, "field ends before start of entries");
+ return;
+ }
+
+ if (!le16_to_cpu(mi->member_bytes)) {
+ prt_printf(out, "member_bytes 0");
+ return;
+ }
+
+ unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes);
+ if (nr != sb->nr_devices)
+ prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices);
+
+ /*
+ * We call to_text() on superblock sections that haven't passed
+ * validate, so we can't trust sb->nr_devices.
+ */
+
+ for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
member_to_text(out, members_v2_get(mi, i), gi, sb, i);
}
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 23a332d7..38aeaa12 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -135,7 +135,9 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
- bool ret;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ u32 orig_id = id;
+#endif
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
@@ -147,11 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- ret = id && id < ancestor
+ bool ret = id && id < ancestor
? test_ancestor_bitmap(t, id, ancestor)
: id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor));
return ret;
}
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index a49376df..79d51aef 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -48,7 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 61ef897c..a5b97c9c 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -586,7 +586,7 @@ static void __bch2_fs_free(struct bch_fs *c)
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
utf8_unload(c->cf_encoding);
#endif
@@ -1015,7 +1015,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
if (ret)
goto err;
-#if IS_ENABLED(CONFIG_UNICODE)
+#ifdef CONFIG_UNICODE
/* Default encoding until we can potentially have more as an option. */
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
if (IS_ERR(c->cf_encoding)) {
@@ -1067,12 +1067,13 @@ noinline_for_stack
static void print_mount_opts(struct bch_fs *c)
{
enum bch_opt_id i;
- struct printbuf p = PRINTBUF;
- bool first = true;
+ CLASS(printbuf, p)();
+ bch2_log_msg_start(c, &p);
prt_str(&p, "starting version ");
bch2_version_to_text(&p, c->sb.version);
+ bool first = true;
for (i = 0; i < bch2_opts_nr; i++) {
const struct bch_option *opt = &bch2_opt_table[i];
u64 v = bch2_opt_get_by_id(&c->opts, i);
@@ -1089,17 +1090,24 @@ static void print_mount_opts(struct bch_fs *c)
}
if (c->sb.version_incompat_allowed != c->sb.version) {
- prt_printf(&p, "\n allowing incompatible features above ");
+ prt_printf(&p, "\nallowing incompatible features above ");
bch2_version_to_text(&p, c->sb.version_incompat_allowed);
}
if (c->opts.verbose) {
- prt_printf(&p, "\n features: ");
+ prt_printf(&p, "\nfeatures: ");
prt_bitflags(&p, bch2_sb_features, c->sb.features);
}
- bch_info(c, "%s", p.buf);
- printbuf_exit(&p);
+ if (c->sb.multi_device) {
+ prt_printf(&p, "\nwith devices");
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) {
+ prt_char(&p, ' ');
+ prt_str(&p, ca->name);
+ }
+ }
+
+ bch2_print_str(c, KERN_INFO, p.buf);
}
static bool bch2_fs_may_start(struct bch_fs *c)
@@ -1143,11 +1151,12 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
- if (IS_ENABLED(CONFIG_UNICODE))
- bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+#ifdef CONFIG_UNICODE
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+#endif
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index dc095327..9c5a9c55 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -1080,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim,
__entry->must_wait)
);
-TRACE_EVENT(trans_restart_journal_preres_get,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- unsigned flags),
- TP_ARGS(trans, caller_ip, flags),
-
- TP_STRUCT__entry(
- __array(char, trans_fn, 32 )
- __field(unsigned long, caller_ip )
- __field(unsigned, flags )
- ),
-
- TP_fast_assign(
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->caller_ip = caller_ip;
- __entry->flags = flags;
- ),
-
- TP_printk("%s %pS %x", __entry->trans_fn,
- (void *) __entry->caller_ip,
- __entry->flags)
-);
-
+#if 0
+/* todo: bring back dynamic fault injection */
DEFINE_EVENT(transaction_event, trans_restart_fault_inject,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
);
+#endif
DEFINE_EVENT(transaction_event, trans_traverse_all,
TP_PROTO(struct btree_trans *trans,
@@ -1195,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill,
TP_ARGS(trans, caller_ip, path)
);
-DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path),
- TP_ARGS(trans, caller_ip, path)
-);
-
-DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip),
- TP_ARGS(trans, caller_ip)
-);
-
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
@@ -1229,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent,
TP_ARGS(trans, caller_ip, path)
);
-DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path),
- TP_ARGS(trans, caller_ip, path)
-);
-
DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
@@ -1294,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced,
__entry->bytes)
);
-TRACE_EVENT(trans_restart_key_cache_key_realloced,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path,
- unsigned old_u64s,
- unsigned new_u64s),
- TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
-
- TP_STRUCT__entry(
- __array(char, trans_fn, 32 )
- __field(unsigned long, caller_ip )
- __field(enum btree_id, btree_id )
- TRACE_BPOS_entries(pos)
- __field(u32, old_u64s )
- __field(u32, new_u64s )
- ),
-
- TP_fast_assign(
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->caller_ip = caller_ip;
-
- __entry->btree_id = path->btree_id;
- TRACE_BPOS_assign(pos, path->pos);
- __entry->old_u64s = old_u64s;
- __entry->new_u64s = new_u64s;
- ),
-
- TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
- __entry->trans_fn,
- (void *) __entry->caller_ip,
- bch2_btree_id_str(__entry->btree_id),
- __entry->pos_inode,
- __entry->pos_offset,
- __entry->pos_snapshot,
- __entry->old_u64s,
- __entry->new_u64s)
-);
-
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
@@ -1490,6 +1412,31 @@ DEFINE_EVENT(fs_str, io_move_evacuate_bucket,
TP_ARGS(c, str)
);
+DEFINE_EVENT(fs_str, extent_trim_atomic,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_slot,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, __btree_iter_peek,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_max,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_prev_min,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
#ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS
TRACE_EVENT(update_by_path,
@@ -1902,21 +1849,6 @@ TRACE_EVENT(btree_path_free,
__entry->dup_locked)
);
-TRACE_EVENT(btree_path_free_trans_begin,
- TP_PROTO(btree_path_idx_t path),
- TP_ARGS(path),
-
- TP_STRUCT__entry(
- __field(btree_path_idx_t, idx )
- ),
-
- TP_fast_assign(
- __entry->idx = path;
- ),
-
- TP_printk(" path %3u", __entry->idx)
-);
-
#else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */
#ifndef _TRACE_BCACHEFS_H
@@ -1934,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st
static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {}
static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {}
static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {}
-static inline void trace_btree_path_free_trans_begin(btree_path_idx_t path) {}
#endif
#endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */