diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2025-06-13 23:42:59 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-06-13 23:42:59 -0400 |
commit | 5b9b063bb71cb1d4dfe124ca886681682a7ba416 (patch) | |
tree | b9fdd1edbadbb2ac6102d92303671cc9d6b0968b | |
parent | 792222046d80bd8715a3ed59faf9e5d5bda58417 (diff) |
Update bcachefs sources to 54de30ef9de8 bcachefs: Kill unused tracepoints
30 files changed, 411 insertions, 270 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index e4007c01..4a0284df 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -5e40e376e242076ba1148c100993e99f339c2391 +54de30ef9de88c274997d8f78d21297d356c7bc0 diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index d0d3a686..5a1cede2 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -862,7 +862,7 @@ struct bch_fs { DARRAY(enum bcachefs_metadata_version) incompat_versions_requested; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; #endif diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index ebd2be54..e92cf392 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -442,7 +442,8 @@ again: if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(trans, b, prev, pulled_from_scan); + ret = lockrestart_do(trans, + btree_repair_node_end(trans, b, prev, pulled_from_scan)); if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 6787d5b9..d8f3c4c6 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); - if (btree_err_on(version < c->sb.version_min, + if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes && + btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->version_min = cpu_to_le16(version); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (bch2_version_compatible(version)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } else { + /* We have no idea what's going on: */ + i->version = cpu_to_le16(c->sb.version); + } } if (btree_err_on(BCH_VERSION_MAJOR(version) > diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index b7840337..06160399 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2076,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter static noinline void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c *k) + struct bpos search_key, struct bkey_s_c *k) { struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key; trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_le(i->k->k.p, iter->pos) && + bpos_le(i->k->k.p, search_key) && bpos_ge(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2092,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_ static noinline void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2100,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_ge(i->k->k.p, path->pos) && + bpos_ge(i->k->k.p, search_key) && bpos_le(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2122,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_pos, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_max(trans->c, iter->btree_id, path->level, - path->pos, + search_pos, end_pos, &iter->journal_idx); } @@ -2138,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos); + struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2151,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, static noinline void btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek(trans, iter, + bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; @@ -2165,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans, static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, path->level, - path->pos, + search_key, end_pos, &iter->journal_idx); } @@ -2179,11 +2183,12 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, static noinline void btree_trans_peek_prev_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek_prev(trans, iter, + bch2_btree_journal_peek_prev(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { @@ -2292,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_journal(trans, iter, &k); + btree_trans_peek_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_updates(trans, iter, &k); + bch2_btree_trans_peek_updates(trans, iter, search_key, &k); if (k.k && bkey_deleted(k.k)) { /* @@ -2326,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } bch2_btree_iter_verify(trans, iter); + + if (trace___btree_iter_peek_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace___btree_iter_peek(trans->c, buf.buf); + } + return k; } @@ -2484,6 +2503,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); + if (trace_btree_iter_peek_max_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_max(trans->c, buf.buf); + } + return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2557,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_prev_journal(trans, iter, &k); + btree_trans_peek_prev_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_prev_updates(trans, iter, &k); + bch2_btree_trans_peek_prev_updates(trans, iter, search_key, &k); if (likely(k.k && !bkey_deleted(k.k))) { break; @@ -2724,6 +2756,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(trans, iter); + + if (trace_btree_iter_peek_prev_min_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_prev_min(trans->c, buf.buf); + } return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2767,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { - if (iter->pos.inode == KEY_INODE_MAX) - return bkey_s_c_null; + if (iter->pos.inode == KEY_INODE_MAX) { + k = bkey_s_c_null; + goto out2; + } bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } @@ -2785,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } struct btree_path *path = btree_iter_path(trans, iter); - if (unlikely(!btree_path_node(path, path->level))) - return bkey_s_c_null; + if (unlikely(!btree_path_node(path, path->level))) { + k = bkey_s_c_null; + goto out2; + } btree_path_set_should_be_locked(trans, path); @@ -2879,7 +2928,20 @@ out: bch2_btree_iter_verify(trans, iter); ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) - return bkey_s_c_err(ret); + k = bkey_s_c_err(ret); +out2: + if (trace_btree_iter_peek_slot_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_slot(trans->c, buf.buf); + } return k; } @@ -3159,46 +3221,32 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long mutex_unlock(&s->lock); } - if (trans->used_mempool) { - if (trans->mem_bytes >= new_bytes) - goto out_change_top; - - /* No more space from mempool item, need malloc new one */ - new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); - if (unlikely(!new_mem)) { - bch2_trans_unlock(trans); - - new_mem = kmalloc(new_bytes, GFP_KERNEL); - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { + EBUG_ON(trans->mem_bytes >= new_bytes); + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + } - ret = bch2_trans_relock(trans); - if (ret) { - kfree(new_mem); - return ERR_PTR(ret); - } - } - memcpy(new_mem, trans->mem, trans->mem_top); - trans->used_mempool = false; - mempool_free(trans->mem, &c->btree_trans_mem_pool); - goto out_new_mem; + if (old_bytes) { + trans->realloc_bytes_required = new_bytes; + trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } - new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + EBUG_ON(trans->mem); + + new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); - new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + new_mem = kmalloc(new_bytes, GFP_KERNEL); if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; - memcpy(new_mem, trans->mem, trans->mem_top); trans->used_mempool = true; - kfree(trans->mem); } - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + EBUG_ON(!new_mem); trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -3207,18 +3255,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (ret) return ERR_PTR(ret); } -out_new_mem: + trans->mem = new_mem; trans->mem_bytes = new_bytes; - if (old_bytes) { - trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart_ip(trans, - BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); - } -out_change_top: - bch2_trans_kmalloc_trace(trans, size, ip); - p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3279,6 +3319,27 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->restart_count++; trans->mem_top = 0; + if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) { + EBUG_ON(!trans->mem || !trans->mem_bytes); + unsigned new_bytes = trans->realloc_bytes_required; + void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + + EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); + + if (!new_mem) { + new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); + new_bytes = BTREE_TRANS_MEM_MAX; + trans->used_mempool = true; + kfree(trans->mem); + } + } + trans->mem = new_mem; + trans->mem_bytes = new_bytes; + } + trans_for_each_path(trans, path, i) { path->should_be_locked = false; diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index cf739875..8f94c3bd 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -141,8 +141,8 @@ search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); - while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + while (*idx < keys->nr && + __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -641,10 +641,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) { const struct journal_key *l = _l; const struct journal_key *r = _r; + int rewind = l->rewind && r->rewind ? -1 : 1; return journal_key_cmp(l, r) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset); + ((cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -712,37 +713,59 @@ int bch2_journal_keys_sort(struct bch_fs *c) struct journal_replay *i, **_i; struct journal_keys *keys = &c->journal_keys; size_t nr_read = 0; + u64 nr_entries = 0, nr_non_ignored = 0; + + u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX; genradix_for_each(&c->journal_entries, iter, _i) { i = *_i; + nr_entries += i != NULL; + if (journal_replay_ignore(i)) continue; + nr_non_ignored++; + cond_resched(); - for_each_jset_key(k, entry, &i->j) { - struct journal_key n = (struct journal_key) { - .btree_id = entry->btree_id, - .level = entry->level, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), - .journal_offset = k->_data - i->j._data, - }; - - if (darray_push(keys, n)) { - __journal_keys_sort(keys); - - if (keys->nr * 8 > keys->size * 7) { - bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", - keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); - return bch_err_throw(c, ENOMEM_journal_keys_sort); + vstruct_for_each(&i->j, entry) { + bool rewind = !entry->level && + !btree_id_is_alloc(entry->btree_id) && + le64_to_cpu(i->j.seq) >= rewind_seq; + + if (entry->type != (rewind + ? BCH_JSET_ENTRY_overwrite + : BCH_JSET_ENTRY_btree_keys)) + continue; + + if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start) + continue; + + jset_entry_for_each_key(entry, k) { + struct journal_key n = (struct journal_key) { + .btree_id = entry->btree_id, + .level = entry->level, + .rewind = rewind, + .k = k, + .journal_seq = le64_to_cpu(i->j.seq), + .journal_offset = k->_data - i->j._data, + }; + + if (darray_push(keys, n)) { + __journal_keys_sort(keys); + + if (keys->nr * 8 > keys->size * 7) { + bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", + keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); + return bch_err_throw(c, ENOMEM_journal_keys_sort); + } + + BUG_ON(darray_push(keys, n)); } - BUG_ON(darray_push(keys, n)); + nr_read++; } - - nr_read++; } } diff --git a/libbcachefs/btree_journal_iter_types.h b/libbcachefs/btree_journal_iter_types.h index 8b773823..86aacb25 100644 --- a/libbcachefs/btree_journal_iter_types.h +++ b/libbcachefs/btree_journal_iter_types.h @@ -11,8 +11,9 @@ struct journal_key { u32 journal_offset; enum btree_id btree_id:8; unsigned level:8; - bool allocated; - bool overwritten; + bool allocated:1; + bool overwritten:1; + bool rewind:1; struct journal_key_range_overwritten __rcu * overwritten_range; struct bkey_i *k; diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 9adca77e..f2173a33 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st EBUG_ON(!btree_node_locked(path, path->level)); EBUG_ON(path->uptodate); - path->should_be_locked = true; - trace_btree_path_should_be_locked(trans, path); + if (!path->should_be_locked) { + path->should_be_locked = true; + trace_btree_path_should_be_locked(trans, path); + } } static inline void __btree_path_set_level_up(struct btree_trans *trans, diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index d9710801..639ef75b 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -595,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); - +#if 0 + /* todo: bring back dynamic fault injection */ if (race_fault()) { trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); } - +#endif /* * Check if the insert will fit in the leaf node with the write lock * held, otherwise another thread could write the node changing the @@ -757,6 +758,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, btree_trans_journal_entries_start(trans), trans->journal_entries.u64s); + EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s); + trans->journal_res.offset += trans->journal_entries.u64s; trans->journal_res.u64s -= trans->journal_entries.u64s; @@ -1003,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; + unsigned journal_u64s = 0; int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); @@ -1031,10 +1035,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s); + journal_u64s = jset_u64s(trans->accounting.u64s); trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; @@ -1054,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) continue; /* we're going to journal the key being updated: */ - trans->journal_u64s += jset_u64s(i->k->k.u64s); + journal_u64s += jset_u64s(i->k->k.u64s); /* and we're also going to log the overwrite: */ if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(i->old_k.u64s); + journal_u64s += jset_u64s(i->old_k.u64s); } if (trans->extra_disk_res) { @@ -1076,6 +1080,8 @@ retry: memset(&trans->journal_res, 0, sizeof(trans->journal_res)); memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); + trans->journal_u64s = journal_u64s + trans->journal_entries.u64s; + ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_); /* make sure we didn't drop or screw up locks: */ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 3aa4a602..112170fd 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -497,6 +497,7 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; + unsigned realloc_bytes_required; #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE darray_trans_kmalloc_trace trans_kmalloc_trace; #endif diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 9e281d18..e7758460 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -290,6 +290,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct bch_fs *c = trans->c; struct write_point *wp; struct btree *b; + BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; + struct open_buckets obs = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim @@ -308,8 +310,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct btree_alloc *a = &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - bkey_copy(&b->key, &a->k); - b->ob = a->ob; + obs = a->ob; + bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); goto out; } @@ -343,12 +345,14 @@ retry: goto retry; } - bkey_btree_ptr_v2_init(&b->key); - bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false); + bkey_btree_ptr_v2_init(&tmp.k); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false); - bch2_open_bucket_get(c, wp, &b->ob); + bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); out: + bkey_copy(&b->key, &tmp.k); + b->ob = obs; six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -509,25 +513,30 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, unsigned flags, struct closure *cl) { + struct btree *b; + unsigned interior; + int ret = 0; + BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); /* * Protects reaping from the btree node cache and using the btree node * open bucket reserve: */ - int ret = bch2_btree_cache_cannibalize_lock(trans, cl); + ret = bch2_btree_cache_cannibalize_lock(trans, cl); if (ret) return ret; - for (unsigned interior = 0; interior < 2; interior++) { + for (interior = 0; interior < 2; interior++) { struct prealloc_nodes *p = as->prealloc_nodes + interior; while (p->nr < nr_nodes[interior]) { - struct btree *b = __bch2_btree_node_alloc(trans, &as->disk_res, - cl, interior, target, flags); - ret = PTR_ERR_OR_ZERO(b); - if (ret) + b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, + interior, target, flags); + if (IS_ERR(b)) { + ret = PTR_ERR(b); goto err; + } p->b[p->nr++] = b; } diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index 90b21e61..21b5c03d 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -676,6 +676,9 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, goto err; bch2_bkey_buf_copy(last_flushed, c, tmp.k); + + /* can we avoid the unconditional restart? */ + trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_); ret = bch_err_throw(c, transaction_restart_write_buffer_flush); } err: diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 5c3df997..300f7cc8 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -18,7 +18,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); int ret = PTR_ERR_OR_ZERO(buf); if (ret) diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index ac326413..86a842f1 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -137,7 +137,6 @@ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ - x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ @@ -148,11 +147,8 @@ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ - x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ @@ -241,7 +237,6 @@ x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \ x(BCH_ERR_journal_res_blocked, journal_stuck) \ x(BCH_ERR_journal_res_blocked, journal_retry_open) \ - x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \ x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \ x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index b899ee75..e76e58a5 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -139,6 +139,17 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, if (ret) return ret; - bch2_cut_back(end, k); + /* tracepoint */ + + if (bpos_lt(end, k->k.p)) { + if (trace_extent_trim_atomic_enabled()) { + CLASS(printbuf, buf)(); + bch2_bpos_to_text(&buf, end); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + trace_extent_trim_atomic(trans->c, buf.buf); + } + bch2_cut_back(end, k); + } return 0; } diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 40b633cd..3063a8dd 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -722,7 +722,7 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (IS_ERR(inode)) inode = NULL; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE if (!inode && IS_CASEFOLDED(vdir)) { /* * Do not cache a negative dentry in casefolded directories @@ -2564,7 +2564,7 @@ got_sb: sb->s_shrink->seeks = 0; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE sb->s_encoding = c->cf_encoding; #endif generic_set_sb_d_ops(sb); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 7736103f..48810a8e 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -806,7 +806,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, if (!n->whiteout) { return bch2_inode_unpack(inode, &n->inode); } else { - n->inode.bi_inum = inode.k->p.inode; + n->inode.bi_inum = inode.k->p.offset; n->inode.bi_snapshot = inode.k->p.snapshot; return 0; } @@ -903,17 +903,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct bch_inode_unpacked new = i->inode; - struct bkey_i whiteout; - - new.bi_snapshot = k.k->p.snapshot; - if (!i->whiteout) { + struct bch_inode_unpacked new = i->inode; + new.bi_snapshot = k.k->p.snapshot; ret = __bch2_fsck_write_inode(trans, &new); } else { + struct bkey_i whiteout; bkey_init(&whiteout.k); whiteout.k.type = KEY_TYPE_whiteout; - whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot); + whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot); ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &whiteout, BTREE_UPDATE_internal_snapshot_node); @@ -1822,18 +1820,39 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) continue; - if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && + u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9; + + if (fsck_err_on(k.k->p.offset > last_block && !bkey_extent_is_reservation(k), trans, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct btree_iter iter2; + struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout)); + ret = PTR_ERR_OR_ZERO(whiteout); + if (ret) + goto err; + + bkey_init(&whiteout->k); + whiteout->k.p = SPOS(k.k->p.inode, + last_block, + i->inode.bi_snapshot); + bch2_key_resize(&whiteout->k, + min(KEY_SIZE_MAX & (~0 << c->block_bits), + U64_MAX - whiteout->k.p.offset)); + - bch2_trans_copy_iter(trans, &iter2, iter); - bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot); + /* + * Need a normal (not BTREE_ITER_all_snapshots) + * iterator, if we're deleting in a different + * snapshot and need to emit a whiteout + */ + struct btree_iter iter2; + bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents, + bkey_start_pos(&whiteout->k), + BTREE_ITER_intent); ret = bch2_btree_iter_traverse(trans, &iter2) ?: - bch2_btree_delete_at(trans, &iter2, + bch2_trans_update(trans, &iter2, whiteout, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter2); if (ret) @@ -2184,7 +2203,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &i->inode); dir->first_this_inode = false; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL; #endif @@ -2474,6 +2493,14 @@ int bch2_check_root(struct bch_fs *c) return ret; } +static bool darray_u32_has(darray_u32 *d, u32 v) +{ + darray_for_each(*d, i) + if (*i == v) + return true; + return false; +} + static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -2501,7 +2528,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, u32 parent = le32_to_cpu(s.v->fs_path_parent); - if (darray_find(subvol_path, parent)) { + if (darray_u32_has(&subvol_path, parent)) { printbuf_reset(&buf); prt_printf(&buf, "subvolume loop:\n"); diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index ceed2eb7..53e5dc1f 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -1265,7 +1265,7 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, { struct bch_fs *c = trans->c; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE int ret = 0; /* Not supported on individual files. */ if (!S_ISDIR(bi->bi_mode)) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index dda802a6..df71af00 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1474,14 +1474,13 @@ void bch2_fs_journal_stop(struct journal *j) clear_bit(JOURNAL_running, &j->flags); } -int bch2_fs_journal_start(struct journal *j, u64 cur_seq) +int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_entry_pin_list *p; struct journal_replay *i, **_i; struct genradix_iter iter; bool had_entries = false; - u64 last_seq = cur_seq, nr, seq; /* * @@ -1495,17 +1494,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return -EINVAL; } - genradix_for_each_reverse(&c->journal_entries, iter, _i) { - i = *_i; - - if (journal_replay_ignore(i)) - continue; + /* Clean filesystem? */ + if (!last_seq) + last_seq = cur_seq; - last_seq = le64_to_cpu(i->j.last_seq); - break; - } - - nr = cur_seq - last_seq; + u64 nr = cur_seq - last_seq; /* * Extra fudge factor, in case we crashed when the journal pin fifo was @@ -1532,6 +1525,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); + u64 seq; fifo_for_each_entry_ptr(p, &j->pin, seq) journal_pin_list_init(p, 1); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 83734fe4..97790703 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -453,7 +453,7 @@ int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); -int bch2_fs_journal_start(struct journal *, u64); +int bch2_fs_journal_start(struct journal *, u64, u64); void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 0b15d71a..afbf12e8 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct printbuf buf = PRINTBUF; int ret = JOURNAL_ENTRY_ADD_OK; + if (last_seq && c->opts.journal_rewind) + last_seq = min(last_seq, c->opts.journal_rewind); + if (!c->journal.oldest_seq_found_ondisk || le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); @@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c, printbuf_reset(&buf); prt_printf(&buf, "journal read done, replaying entries %llu-%llu", *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + u64 drop_before = *last_seq; + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; if (*start_seq != *blacklist_seq) prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); bch_info(c, "%s", buf.buf); - - /* Drop blacklisted entries and entries older than last_seq: */ genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c, continue; seq = le64_to_cpu(i->j.seq); - if (seq < *last_seq) { + if (seq < drop_before) { journal_replay_free(c, i, false); continue; } @@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c, } } - ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1); + ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) goto err; diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c index 24120037..779c22eb 100644 --- a/libbcachefs/namei.c +++ b/libbcachefs/namei.c @@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } + if (S_ISDIR(mode)) { + ret = bch2_maybe_propagate_has_case_insensitive(trans, + (subvol_inum) { + new_inode->bi_subvol ?: dir.subvol, + new_inode->bi_inum }, + new_inode); + if (ret) + goto err; + } + if (S_ISDIR(mode) && !new_inode->bi_subvol) new_inode->bi_depth = dir_u->bi_depth + 1; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 2a026062..b0a76bd6 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -379,6 +379,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Exit recovery immediately prior to journal replay")\ + x(journal_rewind, u64, \ + OPT_FS|OPT_MOUNT, \ + OPT_UINT(0, U64_MAX), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Rewind journal") \ x(recovery_passes, u64, \ OPT_FS|OPT_MOUNT, \ OPT_BITFIELD(bch2_recovery_passes), \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 0b21fa6f..37f2cc1e 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -692,7 +692,7 @@ static bool check_version_upgrade(struct bch_fs *c) ret = true; } - if (new_version > c->sb.version_incompat && + if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { struct printbuf buf = PRINTBUF; @@ -757,6 +757,11 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.nochanges) c->opts.read_only = true; + if (c->opts.journal_rewind) { + bch_info(c, "rewinding journal, fsck required"); + c->opts.fsck = true; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -965,7 +970,7 @@ use_clean: ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", journal_seq, last_seq, blacklist_seq - 1) ?: - bch2_fs_journal_start(&c->journal, journal_seq); + bch2_fs_journal_start(&c->journal, last_seq, journal_seq); if (ret) goto err; @@ -1181,7 +1186,7 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - ret = bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1, 1); if (ret) goto err; diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index d06e7388..82ecee3d 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -217,7 +217,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ - x(snapshot_key_missing_inode_snapshot, 314, 0) \ + x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ @@ -253,7 +253,7 @@ enum bch_fsck_flags { x(extent_overlapping, 215, 0) \ x(key_in_missing_inode, 216, 0) \ x(key_in_wrong_inode_type, 217, 0) \ - x(extent_past_end_of_inode, 218, 0) \ + x(extent_past_end_of_inode, 218, FSCK_AUTOFIX) \ x(dirent_empty_name, 219, 0) \ x(dirent_val_too_big, 220, 0) \ x(dirent_name_too_long, 221, 0) \ diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index 363eb0c6..6245e342 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v1_get(mi, i), gi, sb, i); } @@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + if (!le16_to_cpu(mi->member_bytes)) { + prt_printf(out, "member_bytes 0"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + /* + * We call to_text() on superblock sections that haven't passed + * validate, so we can't trust sb->nr_devices. + */ + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v2_get(mi, i), gi, sb, i); } diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index 23a332d7..38aeaa12 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -135,7 +135,9 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { - bool ret; +#ifdef CONFIG_BCACHEFS_DEBUG + u32 orig_id = id; +#endif guard(rcu)(); struct snapshot_table *t = rcu_dereference(c->snapshots); @@ -147,11 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); - ret = id && id < ancestor + bool ret = id && id < ancestor ? test_ancestor_bitmap(t, id, ancestor) : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor)); return ret; } diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index a49376df..79d51aef 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -48,7 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .inum_snapshot = bi->bi_snapshot, .type = INODE_STR_HASH(bi), -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 61ef897c..a5b97c9c 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -586,7 +586,7 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE utf8_unload(c->cf_encoding); #endif @@ -1015,7 +1015,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; -#if IS_ENABLED(CONFIG_UNICODE) +#ifdef CONFIG_UNICODE /* Default encoding until we can potentially have more as an option. */ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); if (IS_ERR(c->cf_encoding)) { @@ -1067,12 +1067,13 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - struct printbuf p = PRINTBUF; - bool first = true; + CLASS(printbuf, p)(); + bch2_log_msg_start(c, &p); prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); + bool first = true; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); @@ -1089,17 +1090,24 @@ static void print_mount_opts(struct bch_fs *c) } if (c->sb.version_incompat_allowed != c->sb.version) { - prt_printf(&p, "\n allowing incompatible features above "); + prt_printf(&p, "\nallowing incompatible features above "); bch2_version_to_text(&p, c->sb.version_incompat_allowed); } if (c->opts.verbose) { - prt_printf(&p, "\n features: "); + prt_printf(&p, "\nfeatures: "); prt_bitflags(&p, bch2_sb_features, c->sb.features); } - bch_info(c, "%s", p.buf); - printbuf_exit(&p); + if (c->sb.multi_device) { + prt_printf(&p, "\nwith devices"); + for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { + prt_char(&p, ' '); + prt_str(&p, ca->name); + } + } + + bch2_print_str(c, KERN_INFO, p.buf); } static bool bch2_fs_may_start(struct bch_fs *c) @@ -1143,11 +1151,12 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); - if (IS_ENABLED(CONFIG_UNICODE)) - bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#ifdef CONFIG_UNICODE + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#endif if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index dc095327..9c5a9c55 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -1080,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim, __entry->must_wait) ); -TRACE_EVENT(trans_restart_journal_preres_get, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - unsigned flags), - TP_ARGS(trans, caller_ip, flags), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(unsigned, flags ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - __entry->flags = flags; - ), - - TP_printk("%s %pS %x", __entry->trans_fn, - (void *) __entry->caller_ip, - __entry->flags) -); - +#if 0 +/* todo: bring back dynamic fault injection */ DEFINE_EVENT(transaction_event, trans_restart_fault_inject, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), TP_ARGS(trans, caller_ip) ); +#endif DEFINE_EVENT(transaction_event, trans_traverse_all, TP_PROTO(struct btree_trans *trans, @@ -1195,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - -DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1229,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1294,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); -TRACE_EVENT(trans_restart_key_cache_key_realloced, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path, - unsigned old_u64s, - unsigned new_u64s), - TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(enum btree_id, btree_id ) - TRACE_BPOS_entries(pos) - __field(u32, old_u64s ) - __field(u32, new_u64s ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - - __entry->btree_id = path->btree_id; - TRACE_BPOS_assign(pos, path->pos); - __entry->old_u64s = old_u64s; - __entry->new_u64s = new_u64s; - ), - - TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", - __entry->trans_fn, - (void *) __entry->caller_ip, - bch2_btree_id_str(__entry->btree_id), - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->old_u64s, - __entry->new_u64s) -); - DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), @@ -1490,6 +1412,31 @@ DEFINE_EVENT(fs_str, io_move_evacuate_bucket, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, extent_trim_atomic, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_slot, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, __btree_iter_peek, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_max, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_prev_min, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + #ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS TRACE_EVENT(update_by_path, @@ -1902,21 +1849,6 @@ TRACE_EVENT(btree_path_free, __entry->dup_locked) ); -TRACE_EVENT(btree_path_free_trans_begin, - TP_PROTO(btree_path_idx_t path), - TP_ARGS(path), - - TP_STRUCT__entry( - __field(btree_path_idx_t, idx ) - ), - - TP_fast_assign( - __entry->idx = path; - ), - - TP_printk(" path %3u", __entry->idx) -); - #else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ #ifndef _TRACE_BCACHEFS_H @@ -1934,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {} static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {} static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {} -static inline void trace_btree_path_free_trans_begin(btree_path_idx_t path) {} #endif #endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ |