summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-10-09 21:58:03 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-10-09 21:58:03 -0400
commit01413354a54de46edbbd8e53b3767459f8979626 (patch)
tree1fc10751633524c39a6623339e571c9407afb76f
parent1d68a6da59573436f139b93f17febb426578cd81 (diff)
Update bcachefs sources to 8aa83b2beeb3 bcachefs: Improve check_snapshot_exists()
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/acl.c11
-rw-r--r--libbcachefs/bcachefs.h9
-rw-r--r--libbcachefs/bcachefs_format.h13
-rw-r--r--libbcachefs/btree_iter.c35
-rw-r--r--libbcachefs/btree_iter.h49
-rw-r--r--libbcachefs/btree_trans_commit.c41
-rw-r--r--libbcachefs/btree_types.h3
-rw-r--r--libbcachefs/btree_update.c2
-rw-r--r--libbcachefs/disk_accounting.h38
-rw-r--r--libbcachefs/ec.c198
-rw-r--r--libbcachefs/errcode.h5
-rw-r--r--libbcachefs/fs.c4
-rw-r--r--libbcachefs/fsck.c30
-rw-r--r--libbcachefs/inode.c4
-rw-r--r--libbcachefs/inode.h2
-rw-r--r--libbcachefs/journal_io.c3
-rw-r--r--libbcachefs/journal_reclaim.c6
-rw-r--r--libbcachefs/opts.c3
-rw-r--r--libbcachefs/rcu_pending.c2
-rw-r--r--libbcachefs/recovery.c63
-rw-r--r--libbcachefs/recovery.h2
-rw-r--r--libbcachefs/recovery_passes.c99
-rw-r--r--libbcachefs/recovery_passes.h1
-rw-r--r--libbcachefs/snapshot.c66
-rw-r--r--libbcachefs/super.c5
-rw-r--r--libbcachefs/xattr.c2
-rw-r--r--libbcachefs/xattr.h2
28 files changed, 451 insertions, 249 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 9f6715bd..eb105111 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-3b80552e70573764bbf38b89c58749aef9dd8753
+8aa83b2beeb30185242600116e24d2e6c0c2fce5
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 87f1be9d..99487727 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -184,11 +184,6 @@ invalid:
return ERR_PTR(-EINVAL);
}
-#define acl_for_each_entry(acl, acl_e) \
- for (acl_e = acl->a_entries; \
- acl_e < acl->a_entries + acl->a_count; \
- acl_e++)
-
/*
* Convert from in-memory to filesystem representation.
*/
@@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans,
{
struct bkey_i_xattr *xattr;
bch_acl_header *acl_header;
- const struct posix_acl_entry *acl_e;
+ const struct posix_acl_entry *acl_e, *pe;
void *outptr;
unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
- acl_for_each_entry(acl, acl_e) {
+ FOREACH_ACL_ENTRY(acl_e, acl, pe) {
switch (acl_e->e_tag) {
case ACL_USER:
case ACL_GROUP:
@@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
outptr = (void *) acl_header + sizeof(*acl_header);
- acl_for_each_entry(acl, acl_e) {
+ FOREACH_ACL_ENTRY(acl_e, acl, pe) {
bch_acl_entry *entry = outptr;
entry->e_tag = cpu_to_le16(acl_e->e_tag);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index f4151ee5..3f88715a 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -293,6 +293,8 @@ do { \
#define bch_info(c, fmt, ...) \
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_info_ratelimited(c, fmt, ...) \
+ bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_notice(c, fmt, ...) \
bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_warn(c, fmt, ...) \
@@ -352,6 +354,12 @@ do { \
bch_info(c, fmt, ##__VA_ARGS__); \
} while (0)
+#define bch_verbose_ratelimited(c, fmt, ...) \
+do { \
+ if ((c)->opts.verbose) \
+ bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \
+} while (0)
+
#define pr_verbose_init(opts, fmt, ...) \
do { \
if (opt_get(opts, verbose)) \
@@ -1051,6 +1059,7 @@ struct bch_fs {
u64 recovery_passes_complete;
/* never rewinds version of curr_recovery_pass */
enum bch_recovery_pass recovery_pass_done;
+ spinlock_t recovery_pass_lock;
struct semaphore online_fsck_mutex;
/* DEBUG JUNK */
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 5004f6ba..c5e3824d 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -499,8 +499,6 @@ struct bch_sb_field {
#include "disk_groups_format.h"
#include "extents_format.h"
#include "ec_format.h"
-#include "dirent_format.h"
-#include "disk_groups_format.h"
#include "inode_format.h"
#include "journal_seq_blacklist_format.h"
#include "logged_ops_format.h"
@@ -1221,6 +1219,15 @@ struct jset_entry_log {
u8 d[];
} __packed __aligned(8);
+static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l)
+{
+ unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d);
+
+ while (b && !l->d[b - 1])
+ --b;
+ return b;
+}
+
struct jset_entry_datetime {
struct jset_entry entry;
__le64 seconds;
@@ -1361,6 +1368,8 @@ static inline bool btree_id_is_alloc(enum btree_id id)
case BTREE_ID_need_discard:
case BTREE_ID_freespace:
case BTREE_ID_bucket_gens:
+ case BTREE_ID_lru:
+ case BTREE_ID_accounting:
return true;
default:
return false;
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 0883cf6e..eb707bb5 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -748,8 +748,6 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
ret = btree_node_lock(trans, path, &b->c,
path->level, lock_type, trace_ip);
if (unlikely(ret)) {
- if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
- continue;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ret;
BUG();
@@ -2293,6 +2291,12 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
bch2_btree_iter_verify_entry_exit(iter);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
while (1) {
k = __bch2_btree_iter_peek(iter, search_key);
if (unlikely(!k.k))
@@ -2462,6 +2466,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_with_journal)
return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
@@ -2599,6 +2609,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
@@ -2944,6 +2960,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (ret)
+ return ERR_PTR(ret);
+
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
@@ -3001,7 +3021,8 @@ out_new_mem:
if (old_bytes) {
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
p = trans->mem + trans->mem_top;
@@ -3109,6 +3130,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->last_begin_ip = _RET_IP_;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (trans->restarted) {
+ trans->restart_count_this_trans++;
+ } else {
+ trans->restart_count_this_trans = 0;
+ }
+#endif
+
trans_set_locked(trans);
if (trans->restarted) {
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 31a58bf4..cbcd64d1 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
{
unsigned idx = path - trans->paths;
+ EBUG_ON(idx >= trans->nr_paths);
EBUG_ON(!test_bit(idx, trans->paths_allocated));
if (unlikely(path->ref == U8_MAX)) {
bch2_dump_trans_paths_updates(trans);
@@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
{
+ EBUG_ON(path - trans->paths >= trans->nr_paths);
EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated));
EBUG_ON(!path->ref);
EBUG_ON(!path->intent_ref && intent);
@@ -341,21 +343,32 @@ static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans)
}
__always_inline
-static int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
+static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
{
BUG_ON(err <= 0);
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
trans->restarted = err;
- trans->last_restarted_ip = _THIS_IP_;
+ trans->last_restarted_ip = ip;
return -err;
}
__always_inline
static int btree_trans_restart(struct btree_trans *trans, int err)
{
- btree_trans_restart_nounlock(trans, err);
- return -err;
+ return btree_trans_restart_ip(trans, err, _THIS_IP_);
+}
+
+static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (!(ktime_get_ns() & ~(~0UL << min(63, (10 + trans->restart_count_this_trans))))) {
+ trace_and_count(trans->c, trans_restart_injected, trans, ip);
+ return btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_fault_inject, ip);
+ }
+#endif
+ return 0;
}
bool bch2_btree_node_upgrade(struct btree_trans *,
@@ -593,13 +606,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
_btree_id, _pos, _flags, KEY_TYPE_##_type))
+static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k)
+{
+ unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k));
+ memcpy(dst_v, src_k.v, b);
+ if (unlikely(b < dst_size))
+ memset(dst_v + b, 0, dst_size - b);
+}
+
#define bkey_val_copy(_dst_v, _src_k) \
do { \
- unsigned b = min_t(unsigned, sizeof(*_dst_v), \
- bkey_val_bytes(_src_k.k)); \
- memcpy(_dst_v, _src_k.v, b); \
- if (b < sizeof(*_dst_v)) \
- memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
+ BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \
+ __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \
} while (0)
static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
@@ -608,17 +626,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
unsigned val_size, void *val)
{
struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
- ret = bkey_err(k);
+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
+ int ret = bkey_err(k);
if (!ret) {
- unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size);
-
- memcpy(val, k.v, b);
- if (unlikely(b < sizeof(*val)))
- memset((void *) val + b, 0, sizeof(*val) - b);
+ __bkey_val_copy(val, val_size, k);
bch2_trans_iter_exit(trans, &iter);
}
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 9bf471fa..766c56cf 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -609,14 +609,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
return 0;
}
-static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
-{
- return (struct bversion) {
- .hi = res->seq >> 32,
- .lo = (res->seq << 32) | (res->offset + offset),
- };
-}
-
static inline int
bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct btree_insert_entry **stopped_at,
@@ -632,7 +624,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
if (race_fault()) {
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
- return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
}
/*
@@ -701,25 +693,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct jset_entry *entry = trans->journal_entries;
percpu_down_read(&c->mark_lock);
-
for (entry = trans->journal_entries;
entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
entry = vstruct_next(entry))
if (entry->type == BCH_JSET_ENTRY_write_buffer_keys &&
entry->start->k.type == KEY_TYPE_accounting) {
- BUG_ON(!trans->journal_res.ref);
-
- struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
-
- a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
- (u64 *) entry - (u64 *) trans->journal_entries);
- BUG_ON(bversion_zero(a->k.bversion));
-
- if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
- ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal);
- if (ret)
- goto revert_fs_usage;
- }
+ ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags);
+ if (ret)
+ goto revert_fs_usage;
}
percpu_up_read(&c->mark_lock);
@@ -833,13 +814,9 @@ revert_fs_usage:
entry2 != entry;
entry2 = vstruct_next(entry2))
if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys &&
- entry2->start->k.type == KEY_TYPE_accounting) {
- struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
-
- bch2_accounting_neg(a);
- bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
- bch2_accounting_neg(a);
- }
+ entry2->start->k.type == KEY_TYPE_accounting)
+ bch2_accounting_trans_commit_revert(trans,
+ bkey_i_to_accounting(entry2->start), flags);
percpu_up_read(&c->mark_lock);
return ret;
}
@@ -1050,6 +1027,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
bch2_trans_verify_not_unlocked(trans);
bch2_trans_verify_not_in_restart(trans);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret))
+ goto out_reset;
+
if (!trans->nr_updates &&
!trans->journal_entries_u64s)
goto out_reset;
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 4568a41f..ad911bb0 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -509,6 +509,9 @@ struct btree_trans {
bool notrace_relock_fail:1;
enum bch_errcode restarted:16;
u32 restart_count;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ u32 restart_count_this_trans;
+#endif
u64 last_begin_time;
unsigned long last_begin_ip;
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 514df618..39fc7778 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
!(ret = bkey_err(old_k)) &&
bkey_eq(old_pos, old_k.k->p)) {
struct bpos whiteout_pos =
- SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);
if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
diff --git a/libbcachefs/disk_accounting.h b/libbcachefs/disk_accounting.h
index 4ea6c8a0..6639535d 100644
--- a/libbcachefs/disk_accounting.h
+++ b/libbcachefs/disk_accounting.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_DISK_ACCOUNTING_H
#define _BCACHEFS_DISK_ACCOUNTING_H
+#include "btree_update.h"
#include "eytzinger.h"
#include "sb-members.h"
@@ -204,6 +205,43 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
}
+static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
+{
+ EBUG_ON(!res->ref);
+
+ return (struct bversion) {
+ .hi = res->seq >> 32,
+ .lo = (res->seq << 32) | (res->offset + offset),
+ };
+}
+
+static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans,
+ struct bkey_i_accounting *a,
+ unsigned commit_flags)
+{
+ a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
+ (u64 *) a - (u64 *) trans->journal_entries);
+
+ EBUG_ON(bversion_zero(a->k.bversion));
+
+ return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))
+ ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal)
+ : 0;
+}
+
+static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans,
+ struct bkey_i_accounting *a_i,
+ unsigned commit_flags)
+{
+ if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
+ struct bkey_s_accounting a = accounting_i_to_s(a_i);
+
+ bch2_accounting_neg(a);
+ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
+ bch2_accounting_neg(a);
+ }
+}
+
int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *);
int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned);
void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 1587c6e1..564841e5 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -900,7 +900,7 @@ err:
bch2_bkey_val_to_text(&msgbuf, c, orig_k);
bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
- printbuf_exit(&msgbuf);;
+ printbuf_exit(&msgbuf);
ret = -BCH_ERR_stripe_reconstruct;
goto out;
}
@@ -1690,7 +1690,7 @@ static void ec_stripe_key_init(struct bch_fs *c,
set_bkey_val_u64s(&s->k, u64s);
}
-static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
@@ -1698,7 +1698,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
- return -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
+ return NULL;
mutex_init(&s->lock);
closure_init(&s->iodone, NULL);
@@ -1713,10 +1713,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
ec_stripe_key_init(c, &s->new_stripe.key,
s->nr_data, s->nr_parity,
h->blocksize, h->disk_label);
-
- h->s = s;
- h->nr_created++;
- return 0;
+ return s;
}
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
@@ -1857,41 +1854,42 @@ err:
return h;
}
-static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
+static int new_stripe_alloc_buckets(struct btree_trans *trans,
+ struct ec_stripe_head *h, struct ec_stripe_new *s,
enum bch_watermark watermark, struct closure *cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs;
struct open_bucket *ob;
struct open_buckets buckets;
- struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
bool have_cache = true;
int ret = 0;
- BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
- BUG_ON(v->nr_redundant != h->s->nr_parity);
+ BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
+ BUG_ON(v->nr_redundant != s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
- for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
+ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d);
- if (i < h->s->nr_data)
+ if (i < s->nr_data)
nr_have_data++;
else
nr_have_parity++;
}
- BUG_ON(nr_have_data > h->s->nr_data);
- BUG_ON(nr_have_parity > h->s->nr_parity);
+ BUG_ON(nr_have_data > s->nr_data);
+ BUG_ON(nr_have_parity > s->nr_parity);
buckets.nr = 0;
- if (nr_have_parity < h->s->nr_parity) {
+ if (nr_have_parity < s->nr_parity) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->parity_stripe,
&devs,
- h->s->nr_parity,
+ s->nr_parity,
&nr_have_parity,
&have_cache, 0,
BCH_DATA_parity,
@@ -1899,14 +1897,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl);
open_bucket_for_each(c, &buckets, ob, i) {
- j = find_next_zero_bit(h->s->blocks_gotten,
- h->s->nr_data + h->s->nr_parity,
- h->s->nr_data);
- BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
+ j = find_next_zero_bit(s->blocks_gotten,
+ s->nr_data + s->nr_parity,
+ s->nr_data);
+ BUG_ON(j >= s->nr_data + s->nr_parity);
- h->s->blocks[j] = buckets.v[i];
+ s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
- __set_bit(j, h->s->blocks_gotten);
+ __set_bit(j, s->blocks_gotten);
}
if (ret)
@@ -1914,11 +1912,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
}
buckets.nr = 0;
- if (nr_have_data < h->s->nr_data) {
+ if (nr_have_data < s->nr_data) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->block_stripe,
&devs,
- h->s->nr_data,
+ s->nr_data,
&nr_have_data,
&have_cache, 0,
BCH_DATA_user,
@@ -1926,13 +1924,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl);
open_bucket_for_each(c, &buckets, ob, i) {
- j = find_next_zero_bit(h->s->blocks_gotten,
- h->s->nr_data, 0);
- BUG_ON(j >= h->s->nr_data);
+ j = find_next_zero_bit(s->blocks_gotten,
+ s->nr_data, 0);
+ BUG_ON(j >= s->nr_data);
- h->s->blocks[j] = buckets.v[i];
+ s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
- __set_bit(j, h->s->blocks_gotten);
+ __set_bit(j, s->blocks_gotten);
}
if (ret)
@@ -1978,73 +1976,78 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret;
}
-static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
+static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s)
{
- struct bch_fs *c = trans->c;
- struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
- struct bch_stripe *existing_v;
+ struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
+ struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v;
unsigned i;
- s64 idx;
- int ret;
- /*
- * If we can't allocate a new stripe, and there's no stripes with empty
- * blocks for us to reuse, that means we have to wait on copygc:
- */
- idx = get_existing_stripe(c, h);
- if (idx < 0)
- return -BCH_ERR_stripe_alloc_blocked;
-
- ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
- bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
- "reading stripe key: %s", bch2_err_str(ret));
- if (ret) {
- bch2_stripe_close(c, h->s);
- return ret;
- }
-
- existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
-
- BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
- h->s->nr_data = existing_v->nr_blocks -
+ BUG_ON(existing_v->nr_redundant != s->nr_parity);
+ s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
- ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
+ int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
if (ret) {
- bch2_stripe_close(c, h->s);
+ bch2_stripe_close(c, s);
return ret;
}
- BUG_ON(h->s->existing_stripe.size != h->blocksize);
- BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
+ BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we're reusing:
*/
- for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
- bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
- h->s->blocks[i] = 0;
+ for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
+ bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
+ s->blocks[i] = 0;
}
- memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
- memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
+ memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
+ memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated));
- for (i = 0; i < existing_v->nr_blocks; i++) {
+ for (unsigned i = 0; i < existing_v->nr_blocks; i++) {
if (stripe_blockcount_get(existing_v, i)) {
- __set_bit(i, h->s->blocks_gotten);
- __set_bit(i, h->s->blocks_allocated);
+ __set_bit(i, s->blocks_gotten);
+ __set_bit(i, s->blocks_allocated);
}
- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone);
}
- bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
- h->s->have_existing_stripe = true;
+ bkey_copy(&s->new_stripe.key, &s->existing_stripe.key);
+ s->have_existing_stripe = true;
return 0;
}
-static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h,
+ struct ec_stripe_new *s)
+{
+ struct bch_fs *c = trans->c;
+ s64 idx;
+ int ret;
+
+ /*
+ * If we can't allocate a new stripe, and there's no stripes with empty
+ * blocks for us to reuse, that means we have to wait on copygc:
+ */
+ idx = get_existing_stripe(c, h);
+ if (idx < 0)
+ return -BCH_ERR_stripe_alloc_blocked;
+
+ ret = get_stripe_key_trans(trans, idx, &s->existing_stripe);
+ bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
+ "reading stripe key: %s", bch2_err_str(ret));
+ if (ret) {
+ bch2_stripe_close(c, s);
+ return ret;
+ }
+
+ return init_new_stripe_from_existing(c, s);
+}
+
+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h,
+ struct ec_stripe_new *s)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -2053,15 +2056,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
int ret;
- if (!h->s->res.sectors) {
- ret = bch2_disk_reservation_get(c, &h->s->res,
+ if (!s->res.sectors) {
+ ret = bch2_disk_reservation_get(c, &s->res,
h->blocksize,
- h->s->nr_parity,
+ s->nr_parity,
BCH_DISK_RESERVATION_NOFAIL);
if (ret)
return ret;
}
+ /*
+ * Allocate stripe slot
+ * XXX: we're going to need a bitrange btree of free stripes
+ */
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
@@ -2076,7 +2083,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
}
if (bkey_deleted(k.k) &&
- bch2_try_open_stripe(c, h->s, k.k->p.offset))
+ bch2_try_open_stripe(c, s, k.k->p.offset))
break;
}
@@ -2087,16 +2094,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
ret = ec_stripe_mem_alloc(trans, &iter);
if (ret) {
- bch2_stripe_close(c, h->s);
+ bch2_stripe_close(c, s);
goto err;
}
- h->s->new_stripe.key.k.p = iter.pos;
+ s->new_stripe.key.k.p = iter.pos;
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
err:
- bch2_disk_reservation_put(c, &h->s->res);
+ bch2_disk_reservation_put(c, &s->res);
goto out;
}
@@ -2127,22 +2134,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
return h;
if (!h->s) {
- ret = ec_new_stripe_alloc(c, h);
- if (ret) {
+ h->s = ec_new_stripe_alloc(c, h);
+ if (!h->s) {
+ ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
bch_err(c, "failed to allocate new stripe");
goto err;
}
+
+ h->nr_created++;
}
- if (h->s->allocated)
+ struct ec_stripe_new *s = h->s;
+
+ if (s->allocated)
goto allocated;
- if (h->s->have_existing_stripe)
+ if (s->have_existing_stripe)
goto alloc_existing;
/* First, try to allocate a full stripe: */
- ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?:
- __bch2_ec_stripe_head_reserve(trans, h);
+ ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h, s);
if (!ret)
goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
@@ -2154,15 +2166,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
* existing stripe:
*/
while (1) {
- ret = __bch2_ec_stripe_head_reuse(trans, h);
+ ret = __bch2_ec_stripe_head_reuse(trans, h, s);
if (!ret)
break;
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
if (watermark == BCH_WATERMARK_copygc) {
- ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?:
- __bch2_ec_stripe_head_reserve(trans, h);
+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h, s);
if (ret)
goto err;
goto allocate_buf;
@@ -2180,19 +2192,19 @@ alloc_existing:
* Retry allocating buckets, with the watermark for this
* particular write:
*/
- ret = new_stripe_alloc_buckets(trans, h, watermark, cl);
+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl);
if (ret)
goto err;
allocate_buf:
- ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
+ ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize);
if (ret)
goto err;
- h->s->allocated = true;
+ s->allocated = true;
allocated:
- BUG_ON(!h->s->idx);
- BUG_ON(!h->s->new_stripe.data[0]);
+ BUG_ON(!s->idx);
+ BUG_ON(!s->new_stripe.data[0]);
BUG_ON(trans->restarted);
return h;
err:
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 64926351..26990ad5 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -162,7 +162,6 @@
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
x(0, backpointer_to_overwritten_btree_node) \
- x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \
x(EINVAL, fsck) \
x(BCH_ERR_fsck, fsck_fix) \
@@ -171,7 +170,9 @@
x(BCH_ERR_fsck, fsck_errors_not_fixed) \
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
- x(0, restart_recovery) \
+ x(EINVAL, restart_recovery) \
+ x(EINVAL, not_in_recovery) \
+ x(EINVAL, cannot_rewind_recovery) \
x(0, data_update_done) \
x(EINVAL, device_state_not_allowed) \
x(EINVAL, member_info_missing) \
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 857175f4..2d3e134e 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -630,7 +630,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
goto err;
/* regular files may have hardlinks: */
- if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) &&
+ if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) &&
!bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)),
c,
"dirent points to inode that does not point back:\n %s",
@@ -2042,7 +2042,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
printbuf_nul_terminate(&buf);
- seq_puts(seq, buf.buf);
+ seq_printf(seq, ",%s", buf.buf);
int ret = buf.allocation_failure ? -ENOMEM : 0;
printbuf_exit(&buf);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index a1087fd2..b63ed5f1 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -170,7 +170,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans,
if (ret)
return ret;
- struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
*target = le64_to_cpu(d.v->d_inum);
*type = d.v->d_type;
bch2_trans_iter_exit(trans, &iter);
@@ -482,6 +482,13 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
return ret;
}
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos pos)
+{
+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode)
{
@@ -490,13 +497,11 @@ static int remove_backpointer(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct bkey_s_c_dirent d =
- bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0,
- dirent);
- int ret = bkey_err(d) ?:
- dirent_points_to_inode(c, d, inode) ?:
- __remove_dirent(trans, d.k->p);
+ struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter,
+ SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
+ int ret = bkey_err(d) ?:
+ dirent_points_to_inode(c, d, inode) ?:
+ __remove_dirent(trans, d.k->p);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -1022,13 +1027,6 @@ fsck_err:
goto out;
}
-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bpos pos)
-{
- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-}
-
static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
@@ -1983,7 +1981,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
return __bch2_fsck_write_inode(trans, target);
}
- if (bch2_inode_should_have_bp(target) &&
+ if (bch2_inode_should_have_single_bp(target) &&
!fsck_err(trans, inode_wrong_backpointer,
"dirent points to inode that does not point back:\n %s",
(bch2_bkey_val_to_text(&buf, c, d.s_c),
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 2c037e84..ce6ed81c 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -533,6 +533,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
prt_printf(out, "(%x)\n", inode->bi_flags);
prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq);
+ prt_printf(out, "hash_seed=%llx\n", inode->bi_hash_seed);
prt_printf(out, "bi_size=%llu\n", inode->bi_size);
prt_printf(out, "bi_sectors=%llu\n", inode->bi_sectors);
prt_printf(out, "bi_version=%llu\n", inode->bi_version);
@@ -1377,7 +1378,8 @@ again:
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
if (ret > 0) {
- bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
+ bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u",
+ k.k->p.offset, k.k->p.snapshot);
ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
/*
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index c8e98443..e579bd13 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -248,7 +248,7 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
int bch2_inode_nlink_inc(struct bch_inode_unpacked *);
void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
-static inline bool bch2_inode_should_have_bp(struct bch_inode_unpacked *inode)
+static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode)
{
bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset;
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 954f6a96..c113c99b 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -735,9 +735,8 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
- unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d);
- prt_printf(out, "%.*s", bytes, l->d);
+ prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d);
}
static int journal_entry_overwrite_validate(struct bch_fs *c,
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index ace291f1..3d8fc264 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -758,10 +758,12 @@ static int bch2_journal_reclaim_thread(void *arg)
journal_empty = fifo_empty(&j->pin);
spin_unlock(&j->lock);
+ long timeout = j->next_reclaim - jiffies;
+
if (journal_empty)
schedule();
- else if (time_after(j->next_reclaim, jiffies))
- schedule_timeout(j->next_reclaim - jiffies);
+ else if (timeout > 0)
+ schedule_timeout(timeout);
else
break;
}
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 84097235..f853f2f3 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -522,9 +522,8 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
val = "0";
}
- /* Unknown options are ignored: */
if (id < 0)
- return 0;
+ goto bad_opt;
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;
diff --git a/libbcachefs/rcu_pending.c b/libbcachefs/rcu_pending.c
index 40a20192..67522aa3 100644
--- a/libbcachefs/rcu_pending.c
+++ b/libbcachefs/rcu_pending.c
@@ -478,7 +478,9 @@ start_gp:
*/
if (!p->cb_armed) {
p->cb_armed = true;
+ spin_unlock_irqrestore(&p->lock, flags);
__call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
+ goto free_node;
} else {
__start_poll_synchronize_rcu(pending->srcu);
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 6db72d3b..1cc10528 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -34,21 +34,52 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
- if (btree >= BTREE_ID_NR_MAX)
- return;
-
u64 b = BIT_ULL(btree);
+ int ret = 0;
+
+ mutex_lock(&c->sb_lock);
if (!(c->sb.btrees_lost_data & b)) {
bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
-
- mutex_lock(&c->sb_lock);
bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
+
+ switch (btree) {
+ case BTREE_ID_alloc:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ goto out;
+ case BTREE_ID_backpointers:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
+ goto out;
+ case BTREE_ID_need_discard:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ goto out;
+ case BTREE_ID_freespace:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ goto out;
+ case BTREE_ID_bucket_gens:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ goto out;
+ case BTREE_ID_lru:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ goto out;
+ case BTREE_ID_accounting:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ goto out;
+ default:
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+ goto out;
+ }
+out:
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
}
/* for -o reconstruct_alloc: */
@@ -524,22 +555,10 @@ static int read_btree_roots(struct bch_fs *c)
c, btree_root_read_error,
"error reading btree root %s l=%u: %s",
bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
- if (btree_id_is_alloc(i)) {
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ if (btree_id_is_alloc(i))
r->error = 0;
- } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
- bch_info(c, "will run btree node scan");
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
- }
- ret = 0;
- bch2_btree_lost_data(c, i);
+ ret = bch2_btree_lost_data(c, i);
}
}
diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h
index 4bf818de..b0d55754 100644
--- a/libbcachefs/recovery.h
+++ b/libbcachefs/recovery.h
@@ -2,7 +2,7 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
int bch2_journal_replay(struct bch_fs *);
diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c
index 735b8adc..e6676a1b 100644
--- a/libbcachefs/recovery_passes.c
+++ b/libbcachefs/recovery_passes.c
@@ -94,12 +94,22 @@ u64 bch2_recovery_passes_from_stable(u64 v)
/*
* For when we need to rewind recovery passes and run a pass we skipped:
*/
-int bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
+static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
{
if (c->opts.recovery_passes & BIT_ULL(pass))
return 0;
+ if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
+ return -BCH_ERR_not_in_recovery;
+
+ if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
+ c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
+ bch_info(c, "need recovery pass %s (%u), but already rw",
+ bch2_recovery_passes[pass], pass);
+ return -BCH_ERR_cannot_rewind_recovery;
+ }
+
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
bch2_recovery_passes[pass], pass,
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
@@ -115,6 +125,27 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
}
}
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&c->recovery_pass_lock, flags);
+ int ret = __bch2_run_explicit_recovery_pass(c, pass);
+ spin_unlock_irqrestore(&c->recovery_pass_lock, flags);
+ return ret;
+}
+
+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
+
+ return bch2_run_explicit_recovery_pass(c, pass);
+}
+
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
enum bch_recovery_pass pass)
{
@@ -204,7 +235,14 @@ int bch2_run_online_recovery_passes(struct bch_fs *c)
continue;
ret = bch2_run_recovery_pass(c, i);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+
+ if (c->curr_recovery_pass < i) {
+ /*
+ * bch2_run_explicit_recovery_pass() was called: we
+ * can't always catch -BCH_ERR_restart_recovery because
+ * it may have been called from another thread (btree
+ * node read completion)
+ */
i = c->curr_recovery_pass;
continue;
}
@@ -222,30 +260,51 @@ int bch2_run_recovery_passes(struct bch_fs *c)
int ret = 0;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ spin_lock_irq(&c->recovery_pass_lock);
+ unsigned pass = c->curr_recovery_pass;
+
if (c->opts.recovery_pass_last &&
- c->curr_recovery_pass > c->opts.recovery_pass_last)
+ c->curr_recovery_pass > c->opts.recovery_pass_last) {
+ spin_unlock_irq(&c->recovery_pass_lock);
break;
+ }
- if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
- unsigned pass = c->curr_recovery_pass;
-
- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?:
- bch2_journal_flush(&c->journal);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
- (ret && c->curr_recovery_pass < pass))
- continue;
- if (ret)
- break;
-
- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+ if (!should_run_recovery_pass(c, pass)) {
+ c->curr_recovery_pass++;
+ c->recovery_pass_done = max(c->recovery_pass_done, pass);
+ spin_unlock_irq(&c->recovery_pass_lock);
+ continue;
+ }
+ spin_unlock_irq(&c->recovery_pass_lock);
+
+ ret = bch2_run_recovery_pass(c, pass) ?:
+ bch2_journal_flush(&c->journal);
+
+ spin_lock_irq(&c->recovery_pass_lock);
+ if (c->curr_recovery_pass < pass) {
+ /*
+ * bch2_run_explicit_recovery_pass() was called: we
+ * can't always catch -BCH_ERR_restart_recovery because
+ * it may have been called from another thread (btree
+ * node read completion)
+ */
+ spin_unlock_irq(&c->recovery_pass_lock);
+ continue;
+ } else if (c->curr_recovery_pass == pass) {
+ c->curr_recovery_pass++;
+ } else {
+ BUG();
}
+ spin_unlock_irq(&c->recovery_pass_lock);
- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+ if (ret)
+ break;
- if (!test_bit(BCH_FS_error, &c->flags))
- bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
+ c->recovery_passes_complete |= BIT_ULL(pass);
+ c->recovery_pass_done = max(c->recovery_pass_done, pass);
- c->curr_recovery_pass++;
+ if (!test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, pass);
}
return ret;
diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h
index 99b464e1..7d7339c8 100644
--- a/libbcachefs/recovery_passes.h
+++ b/libbcachefs/recovery_passes.h
@@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_online_recovery_passes(struct bch_fs *);
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 9f4d13fa..34e01bd8 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -506,7 +506,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
break;
}
}
-
bch2_trans_iter_exit(trans, &iter);
if (!ret && !found) {
@@ -536,6 +535,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
struct bch_snapshot s;
struct bch_subvolume subvol;
struct printbuf buf = PRINTBUF;
+ struct btree_iter snapshot_iter = {};
u32 root_id;
int ret;
@@ -545,16 +545,27 @@ static int check_snapshot_tree(struct btree_trans *trans,
st = bkey_s_c_to_snapshot_tree(k);
root_id = le32_to_cpu(st.v->root_snapshot);
- ret = bch2_snapshot_lookup(trans, root_id, &s);
+ struct bkey_s_c_snapshot snapshot_k =
+ bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots,
+ POS(0, root_id), 0, snapshot);
+ ret = bkey_err(snapshot_k);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
+ if (!ret)
+ bkey_val_copy(&s, snapshot_k);
+
if (fsck_err_on(ret ||
root_id != bch2_snapshot_root(c, root_id) ||
st.k->p.offset != le32_to_cpu(s.tree),
trans, snapshot_tree_to_missing_snapshot,
"snapshot tree points to missing/incorrect snapshot:\n %s",
- (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ (bch2_bkey_val_to_text(&buf, c, st.s_c),
+ prt_newline(&buf),
+ ret
+ ? prt_printf(&buf, "(%s)", bch2_err_str(ret))
+ : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c),
+ buf.buf))) {
ret = bch2_btree_delete_at(trans, iter, 0);
goto err;
}
@@ -605,6 +616,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
}
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &snapshot_iter);
printbuf_exit(&buf);
return ret;
}
@@ -905,12 +917,30 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
if (bch2_snapshot_equiv(c, id))
return 0;
- /* 0 is an invalid tree ID */
+ /* Do we need to reconstruct the snapshot_tree entry as well? */
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
u32 tree_id = 0;
- int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN,
+ 0, k, ret) {
+ if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
+ tree_id = k.k->p.offset;
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
if (ret)
return ret;
+ if (!tree_id) {
+ ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+ }
+
struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
ret = PTR_ERR_OR_ZERO(snapshot);
if (ret)
@@ -921,6 +951,16 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
snapshot->v.tree = cpu_to_le32(tree_id);
snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
+ 0, k, ret) {
+ if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
+ snapshot->v.subvol = cpu_to_le32(k.k->p.offset);
+ SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true);
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
@@ -1707,18 +1747,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
struct bkey_s_c k;
int ret;
- bch2_trans_iter_init(trans, &iter, id, pos,
- BTREE_ITER_not_extents|
- BTREE_ITER_all_snapshots);
- while (1) {
- k = bch2_btree_iter_prev(&iter);
- ret = bkey_err(k);
- if (ret)
- break;
-
- if (!k.k)
- break;
-
+ for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos),
+ BTREE_ITER_not_extents|
+ BTREE_ITER_all_snapshots,
+ k, ret) {
if (!bkey_eq(pos, k.k->p))
break;
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 873e4be7..843431e5 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -766,6 +766,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
refcount_set(&c->ro_ref, 1);
init_waitqueue_head(&c->ro_ref_wait);
+ spin_lock_init(&c->recovery_pass_lock);
sema_init(&c->online_fsck_mutex, 1);
init_rwsem(&c->gc_lock);
@@ -1120,12 +1121,12 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
prt_bdevname(&buf, fs->bdev);
prt_char(&buf, ' ');
- bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));;
+ bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));
prt_newline(&buf);
prt_bdevname(&buf, sb->bdev);
prt_char(&buf, ' ');
- bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
+ bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));
prt_newline(&buf);
if (!opts->no_splitbrain_check)
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index 56c8d3fe..9986bc7f 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -609,7 +609,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
#endif /* NO_BCACHEFS_FS */
-const struct xattr_handler *bch2_xattr_handlers[] = {
+const struct xattr_handler * const bch2_xattr_handlers[] = {
&bch_xattr_user_handler,
&bch_xattr_trusted_handler,
&bch_xattr_security_handler,
diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h
index c188a5ad..2c96de05 100644
--- a/libbcachefs/xattr.h
+++ b/libbcachefs/xattr.h
@@ -44,6 +44,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum,
ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
-extern const struct xattr_handler *bch2_xattr_handlers[];
+extern const struct xattr_handler * const bch2_xattr_handlers[];
#endif /* _BCACHEFS_XATTR_H */