summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-04-28 12:50:37 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-04-28 12:50:37 -0400
commit0589d9f3c3cdff28a13f9efc96762d39ef615d22 (patch)
tree4d7e466a10ba9cd10c8b30e7aad1606e44770314
parente00e83b84d8de5a6fa279a3c50ad1d7603ee4607 (diff)
Update bcachefs sources to 6e73711dc355 bcachefs: Read retries are after checksum errors now REQ_FUA
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/alloc_background.c10
-rw-r--r--libbcachefs/alloc_foreground.c2
-rw-r--r--libbcachefs/alloc_foreground.h4
-rw-r--r--libbcachefs/bcachefs.h11
-rw-r--r--libbcachefs/btree_gc.c12
-rw-r--r--libbcachefs/btree_io.c270
-rw-r--r--libbcachefs/btree_io.h4
-rw-r--r--libbcachefs/btree_iter.c9
-rw-r--r--libbcachefs/btree_update.c7
-rw-r--r--libbcachefs/btree_update.h12
-rw-r--r--libbcachefs/buckets.c19
-rw-r--r--libbcachefs/debug.c4
-rw-r--r--libbcachefs/dirent.c29
-rw-r--r--libbcachefs/dirent.h15
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/error.c44
-rw-r--r--libbcachefs/error.h4
-rw-r--r--libbcachefs/extents.c61
-rw-r--r--libbcachefs/extents.h3
-rw-r--r--libbcachefs/extents_types.h1
-rw-r--r--libbcachefs/fs.c304
-rw-r--r--libbcachefs/inode.h8
-rw-r--r--libbcachefs/io_read.c83
-rw-r--r--libbcachefs/journal_reclaim.c26
-rw-r--r--libbcachefs/move.c5
-rw-r--r--libbcachefs/namei.c3
-rw-r--r--libbcachefs/recovery.c52
-rw-r--r--libbcachefs/recovery.h2
-rw-r--r--libbcachefs/recovery_passes.c26
-rw-r--r--libbcachefs/recovery_passes.h10
-rw-r--r--libbcachefs/sb-downgrade.c4
-rw-r--r--libbcachefs/sb-errors_format.h6
-rw-r--r--libbcachefs/sb-members.c8
-rw-r--r--libbcachefs/sb-members.h13
-rw-r--r--libbcachefs/snapshot.c4
-rw-r--r--libbcachefs/str_hash.h5
-rw-r--r--libbcachefs/subvolume.c6
-rw-r--r--libbcachefs/super-io.c3
-rw-r--r--libbcachefs/super.c136
-rw-r--r--libbcachefs/super.h1
-rw-r--r--libbcachefs/tests.c4
-rw-r--r--libbcachefs/util.c13
43 files changed, 795 insertions, 451 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 5fe33c98..a5b138f7 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-c9d875f9be1f853e747c9e00421c678b0adf73d2
+6e73711dc3556f90eefa12d6cc7547d4b0eba5dc
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 37cbbf86..002e3853 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -484,7 +484,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
- ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
+ ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_);
bch2_trans_iter_exit(trans, &iter);
return unlikely(ret) ? ERR_PTR(ret) : a;
}
@@ -2393,14 +2393,16 @@ bkey_err:
int bch2_fs_freespace_init(struct bch_fs *c)
{
- int ret = 0;
- bool doing_init = false;
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image))
+ return 0;
+
/*
* We can crash during the device add path, so we need to check this on
* every mount:
*/
+ bool doing_init = false;
for_each_member_device(c, ca) {
if (ca->mi.freespace_initialized)
continue;
@@ -2410,7 +2412,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
doing_init = true;
}
- ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+ int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
if (ret) {
bch2_dev_put(ca);
bch_err_fn(c, ret);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index ac1244e7..a0f92daa 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -1339,6 +1339,8 @@ alloc_done:
open_bucket_for_each(c, &req->wp->ptrs, ob, i)
req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free);
+ req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c));
+
BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX);
return 0;
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index 64e1f1ef..19220341 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -157,7 +157,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ
unsigned i;
open_bucket_for_each(c, &wp->ptrs, ob, i)
- ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+ ob_push(c, ob->sectors_free < block_sectors(c)
+ ? &ptrs
+ : &keep, ob);
wp->ptrs = keep;
mutex_unlock(&wp->lock);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 675b8f8e..5fb8b0f4 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -295,6 +295,16 @@ do { \
bch2_print(_c, __VA_ARGS__); \
} while (0)
+#define bch2_print_str_ratelimited(_c, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ \
+ if (__ratelimit(&_rs)) \
+ bch2_print_str(_c, __VA_ARGS__); \
+} while (0)
+
#define bch_info(c, fmt, ...) \
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_info_ratelimited(c, fmt, ...) \
@@ -834,6 +844,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
+ u64 recovery_passes_required;
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 9db8f5f8..b4332338 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -350,20 +350,13 @@ again:
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
- trans, btree_node_read_error,
- "Topology repair: unreadable btree node at\n%s",
- buf.buf)) {
+ if (bch2_err_matches(ret, EIO)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
if (ret)
break;
-
- ret = bch2_btree_lost_data(c, b->c.btree_id);
- if (ret)
- break;
continue;
}
@@ -525,9 +518,6 @@ int bch2_check_topology(struct bch_fs *c)
bch2_btree_id_to_text(&buf, i);
if (r->error) {
- ret = bch2_btree_lost_data(c, i);
- if (ret)
- break;
reconstruct_root:
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 5cd27a9c..aec397b4 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -515,19 +515,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca,
+ bool print_pos,
struct btree *b, struct bset *i, struct bkey_packed *k,
- unsigned offset, int write)
+ unsigned offset, int rw)
{
- prt_printf(out, bch2_log_msg(c, "%s"),
- write == READ
- ? "error validating btree node "
- : "corrupt btree node before write ");
+ if (print_pos) {
+ prt_str(out, rw == READ
+ ? "error validating btree node "
+ : "corrupt btree node before write ");
+ prt_printf(out, "at btree ");
+ bch2_btree_pos_to_text(out, c, b);
+ prt_newline(out);
+ }
+
if (ca)
- prt_printf(out, "on %s ", ca->name);
- prt_printf(out, "at btree ");
- bch2_btree_pos_to_text(out, c, b);
+ prt_printf(out, "%s ", ca->name);
- prt_printf(out, "\nnode offset %u/%u",
+ prt_printf(out, "node offset %u/%u",
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
@@ -538,75 +542,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_str(out, ": ");
}
-__printf(10, 11)
+__printf(11, 12)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
struct btree *b,
struct bset *i,
struct bkey_packed *k,
- int write,
- bool have_retry,
+ int rw,
enum bch_sb_error_id err_type,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg,
const char *fmt, ...)
{
- bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
+ if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
+ return -BCH_ERR_fsck_fix;
+
+ bool have_retry = false;
+ int ret2;
+
+ if (ca) {
+ bch2_mark_btree_validate_failure(failed, ca->dev_idx);
+
+ struct extent_ptr_decoded pick;
+ have_retry = !bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ failed, &pick, -1);
+ }
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
ret = -BCH_ERR_btree_node_read_err_fixable;
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
ret = -BCH_ERR_btree_node_read_err_bad_node;
- if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
- bch2_sb_error_count(c, err_type);
+ bch2_sb_error_count(c, err_type);
+
+ bool print_deferred = err_msg &&
+ rw == READ &&
+ !(test_bit(BCH_FS_fsck_running, &c->flags) &&
+ c->opts.fix_errors == FSCK_FIX_ask);
struct printbuf out = PRINTBUF;
- if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
- printbuf_indent_add_nextline(&out, 2);
-#ifdef BCACHEFS_LOG_PREFIX
- prt_printf(&out, bch2_log_msg(c, ""));
-#endif
- }
+ bch2_log_msg_start(c, &out);
+
+ if (!print_deferred)
+ err_msg = &out;
- btree_err_msg(&out, c, ca, b, i, k, b->written, write);
+ btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw);
va_list args;
va_start(args, fmt);
- prt_vprintf(&out, fmt, args);
+ prt_vprintf(err_msg, fmt, args);
va_end(args);
- if (write == WRITE) {
+ if (print_deferred) {
+ prt_newline(err_msg);
+
+ switch (ret) {
+ case -BCH_ERR_btree_node_read_err_fixable:
+ ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
+ goto fsck_err;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
+ goto out;
+ case -BCH_ERR_btree_node_read_err_bad_node:
+ prt_str(&out, ", ");
+ ret = __bch2_topology_error(c, &out);
+ break;
+ }
+
+ goto out;
+ }
+
+ if (rw == WRITE) {
prt_str(&out, ", ");
ret = __bch2_inconsistent_error(c, &out)
? -BCH_ERR_fsck_errors_not_fixed
: 0;
- silent = false;
+ goto print;
}
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
- ret = !silent
- ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf)
- : -BCH_ERR_fsck_fix;
- if (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore)
+ ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
goto fsck_err;
- ret = -BCH_ERR_fsck_fix;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
ret = __bch2_topology_error(c, &out);
- if (ret)
- silent = false;
- break;
- case -BCH_ERR_btree_node_read_err_incompatible:
- ret = -BCH_ERR_fsck_errors_not_fixed;
- silent = false;
break;
}
-
- if (!silent)
- bch2_print_str(c, KERN_ERR, out.buf);
+print:
+ bch2_print_str(c, KERN_ERR, out.buf);
out:
fsck_err:
printbuf_exit(&out);
@@ -615,8 +654,9 @@ fsck_err:
#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
({ \
- int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
+ int _ret = __btree_err(type, c, ca, b, i, k, write, \
BCH_FSCK_ERR_##_err_type, \
+ failed, err_msg, \
msg, ##__VA_ARGS__); \
\
if (_ret != -BCH_ERR_fsck_fix) { \
@@ -624,7 +664,7 @@ fsck_err:
goto fsck_err; \
} \
\
- *saw_error = true; \
+ true; \
})
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
@@ -682,8 +722,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct btree *b, struct bset *i,
- unsigned offset, unsigned sectors,
- int write, bool have_retry, bool *saw_error)
+ unsigned offset, unsigned sectors, int write,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
@@ -896,7 +937,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b,
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, int write,
- bool have_retry, bool *saw_error)
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
@@ -1009,7 +1051,9 @@ fsck_err:
}
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
- struct btree *b, bool have_retry, bool *saw_error)
+ struct btree *b,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
struct btree_node_entry *bne;
struct sort_iter *iter;
@@ -1022,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
u64 max_journal_seq = 0;
struct printbuf buf = PRINTBUF;
- int ret = 0, retry_read = 0, write = READ;
+ int ret = 0, write = READ;
u64 start_time = local_clock();
b->version_ondisk = U16_MAX;
@@ -1156,15 +1200,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
- ret = validate_bset(c, ca, b, i, b->written, sectors,
- READ, have_retry, saw_error);
+ ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg);
if (ret)
goto fsck_err;
if (!b->written)
btree_node_set_format(b, b->data->format);
- ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
+ ret = validate_bset_keys(c, b, i, READ, failed, err_msg);
if (ret)
goto fsck_err;
@@ -1292,20 +1335,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (!ptr_written)
set_btree_node_need_rewrite(b);
-out:
+fsck_err:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
- return retry_read;
-fsck_err:
- if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry) {
- retry_read = 1;
- } else {
- set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- }
- goto out;
+ return ret;
}
static void btree_node_read_work(struct work_struct *work)
@@ -1317,15 +1351,25 @@ static void btree_node_read_work(struct work_struct *work)
struct btree *b = rb->b;
struct bio *bio = &rb->bio;
struct bch_io_failures failed = { .nr = 0 };
+ int ret = 0;
+
struct printbuf buf = PRINTBUF;
- bool saw_error = false;
- bool retry = false;
- bool can_retry;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "btree node read error at btree ");
+ bch2_btree_pos_to_text(&buf, c, b);
+ prt_newline(&buf);
goto start;
while (1) {
- retry = true;
- bch_info(c, "retrying read");
+ ret = bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ &failed, &rb->pick, -1);
+ if (ret) {
+ set_btree_node_read_error(b);
+ break;
+ }
+
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = ca != NULL;
rb->start_time = local_clock();
@@ -1343,60 +1387,59 @@ static void btree_node_read_work(struct work_struct *work)
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
rb->start_time, !bio->bi_status);
start:
- printbuf_reset(&buf);
- bch2_btree_pos_to_text(&buf, c, b);
-
- if (ca && bio->bi_status)
- bch_err_dev_ratelimited(ca,
- "btree read error %s for %s",
- bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = false;
- bch2_mark_io_failure(&failed, &rb->pick, false);
-
- can_retry = bch2_bkey_pick_read_device(c,
- bkey_i_to_s_c(&b->key),
- &failed, &rb->pick, -1) > 0;
-
- if (!bio->bi_status &&
- !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
- if (retry)
- bch_info(c, "retry success");
- break;
+ if (bio->bi_status) {
+ bch2_mark_io_failure(&failed, &rb->pick, false);
+ continue;
}
- saw_error = true;
+ ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
+ if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
+ ret == -BCH_ERR_btree_node_read_err_must_retry)
+ continue;
- if (!can_retry) {
+ if (ret)
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- break;
- }
+
+ break;
}
- async_object_list_del(c, btree_read_bio, rb->list_idx);
- bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
- rb->start_time);
- bio_put(&rb->bio);
+ bch2_io_failures_to_text(&buf, c, &failed);
- if ((saw_error ||
+ if (btree_node_read_error(b))
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+
+ /*
+ * only print retry success if we read from a replica with no errors
+ */
+ if (btree_node_read_error(b))
+ prt_printf(&buf, "ret %s", bch2_err_str(ret));
+ else if (failed.nr) {
+ if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
+ prt_printf(&buf, "retry success");
+ else
+ prt_printf(&buf, "repair success");
+ }
+
+ if ((failed.nr ||
btree_node_need_rewrite(b)) &&
!btree_node_read_error(b) &&
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
- if (saw_error) {
- printbuf_reset(&buf);
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
- __func__, buf.buf);
- }
-
+ prt_printf(&buf, " (rewriting node)");
bch2_btree_node_rewrite_async(c, b);
}
+ prt_newline(&buf);
+
+ if (failed.nr)
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
+ async_object_list_del(c, btree_read_bio, rb->list_idx);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+ rb->start_time);
+ bio_put(&rb->bio);
printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
@@ -1480,12 +1523,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
struct btree *b = ra->b;
struct printbuf buf = PRINTBUF;
bool dump_bset_maps = false;
- bool have_retry = false;
int ret = 0, best = -1, write = READ;
unsigned i, written = 0, written2 = 0;
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
bool _saw_error = false, *saw_error = &_saw_error;
+ struct printbuf *err_msg = NULL;
+ struct bch_io_failures *failed = NULL;
for (i = 0; i < ra->nr; i++) {
struct btree_node *bn = ra->buf[i];
@@ -1578,14 +1622,19 @@ fsck_err:
if (best >= 0) {
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
- ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
+ ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL);
} else {
ret = -1;
}
if (ret) {
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+ if (buf.pos)
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
} else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
@@ -1718,6 +1767,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
+ prt_newline(&buf);
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
bch_err_ratelimited(c, "%s", buf.buf);
if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
@@ -1725,7 +1776,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bch2_fatal_error(c);
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
@@ -2194,8 +2244,6 @@ static void btree_node_write_endio(struct bio *bio)
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
- bool saw_error;
-
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
(struct bkey_validate_context) {
.from = BKEY_VALIDATE_btree_node,
@@ -2208,8 +2256,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
- ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
- validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
+ ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?:
+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index afdb11a9..30a51805 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *);
void bch2_btree_init_next(struct btree_trans *, struct btree *);
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
- struct btree *, bool, bool *);
+ struct btree *,
+ struct bch_io_failures *,
+ struct printbuf *);
void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index c824f1b6..9d941619 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
struct bpos end)
{
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
- !bkey_eq(iter->pos, POS_MAX)) {
+ !bkey_eq(iter->pos, POS_MAX) &&
+ !((iter->flags & BTREE_ITER_is_extents) &&
+ iter->pos.offset == U64_MAX)) {
+
/*
* bkey_start_pos(), for extents, is not monotonically
* increasing until after filtering for snapshots:
@@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify_entry_exit(iter);
- EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode);
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (unlikely(ret)) {
@@ -3123,6 +3126,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (new_bytes > s->max_mem) {
+ mutex_lock(&s->lock);
#ifdef CONFIG_BCACHEFS_DEBUG
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
@@ -3134,6 +3138,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
s->trans_kmalloc_trace.nr);
#endif
s->max_mem = new_bytes;
+ mutex_unlock(&s->lock);
}
if (trans->used_mempool) {
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 2bffd512..ce83cd03 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -511,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
return 0;
}
-int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
{
kmsan_check_memory(k, bkey_bytes(&k->k));
@@ -548,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
path_idx = iter->key_cache_path;
}
- return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
+ return bch2_trans_update_by_path(trans, path_idx, k, flags, ip);
}
int bch2_btree_insert_clone_trans(struct btree_trans *trans,
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index e674419c..62d24b08 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -102,8 +102,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *
int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
enum btree_id, struct bpos);
-int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, enum btree_iter_update_trigger_flags);
+int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, enum btree_iter_update_trigger_flags,
+ unsigned long);
+
+static inline int __must_check
+bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+{
+ return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_);
+}
struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 7c267244..596edc7b 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf
bool print = __bch2_count_fsck_err(c, id, buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, buf,
BCH_RECOVERY_PASS_check_allocations);
if (insert) {
@@ -599,6 +599,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
}
struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
+ if (!bucket_valid(ca, bucket.offset)) {
+ if (insert) {
+ bch2_dev_bucket_missing(ca, bucket.offset);
+ ret = -BCH_ERR_trigger_pointer;
+ }
+ goto err;
+ }
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
@@ -965,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
- bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_allocations);
if (print)
@@ -1310,13 +1317,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
- bucket_gens->nbuckets = min(bucket_gens->nbuckets,
- old_bucket_gens->nbuckets);
- bucket_gens->nbuckets_minus_first =
- bucket_gens->nbuckets - bucket_gens->first_bucket;
+ u64 copy = min(bucket_gens->nbuckets,
+ old_bucket_gens->nbuckets);
memcpy(bucket_gens->b,
old_bucket_gens->b,
- bucket_gens->nbuckets);
+ sizeof(bucket_gens->b[0]) * copy);
}
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index c24de2e3..8b8800ad 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
struct btree_node *n_sorted = c->verify_data->data;
struct bset *sorted, *inmemory = &b->data->keys;
struct bio *bio;
- bool failed = false, saw_error = false;
+ bool failed = false;
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_verify_replicas);
@@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
v->written = 0;
- if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
+ if (bch2_btree_node_read_done(c, ca, v, NULL, NULL))
return false;
n_sorted = c->verify_data->data;
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 8488a757..b012b9dd 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -13,8 +13,8 @@
#include <linux/dcache.h>
-static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
+int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
@@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *
#endif
}
-static inline int bch2_maybe_casefold(struct btree_trans *trans,
- const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
-{
- if (likely(!info->cf_encoding)) {
- *out_cf = *str;
- return 0;
- } else {
- return bch2_casefold(trans, info, str, out_cf);
- }
-}
-
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
@@ -224,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
struct qstr d_name = bch2_dirent_get_name(d);
- prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
+ prt_printf(out, "%.*s", d_name.len, d_name.name);
+
+ if (d.v->d_casefold) {
+ struct qstr d_name = bch2_dirent_get_lookup_name(d);
+ prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name);
+ }
+
+ prt_str(out, " ->");
if (d.v->d_type != DT_SUBVOL)
- prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
+ prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum));
else
- prt_printf(out, "%u -> %u",
+ prt_printf(out, " %u -> %u",
le32_to_cpu(d.v->d_parent_subvol),
le32_to_cpu(d.v->d_child_subvol));
diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h
index 0880772b..9838a7ba 100644
--- a/libbcachefs/dirent.h
+++ b/libbcachefs/dirent.h
@@ -23,6 +23,21 @@ struct bch_fs;
struct bch_hash_info;
struct bch_inode_info;
+int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
+ const struct qstr *, struct qstr *);
+
+static inline int bch2_maybe_casefold(struct btree_trans *trans,
+ const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
+{
+ if (likely(!info->cf_encoding)) {
+ *out_cf = *str;
+ return 0;
+ } else {
+ return bch2_casefold(trans, info, str, out_cf);
+ }
+}
+
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 83ed5a63..13e28f9a 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -175,6 +175,7 @@
x(0, backpointer_to_overwritten_btree_node) \
x(0, journal_reclaim_would_deadlock) \
x(EINVAL, fsck) \
+ x(BCH_ERR_fsck, fsck_ask) \
x(BCH_ERR_fsck, fsck_fix) \
x(BCH_ERR_fsck, fsck_delete_bkey) \
x(BCH_ERR_fsck, fsck_ignore) \
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 91a65a20..dc3f99dc 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
__bch2_inconsistent_error(c, out);
return -BCH_ERR_btree_need_topology_repair;
} else {
- return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?:
+ return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?:
-BCH_ERR_btree_node_read_validate_error;
}
}
@@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
return print && !repeat;
}
+int bch2_fsck_err_opt(struct bch_fs *c,
+ enum bch_fsck_flags flags,
+ enum bch_sb_error_id err)
+{
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
+ if (test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
+ return -BCH_ERR_fsck_repair_unimplemented;
+
+ switch (c->opts.fix_errors) {
+ case FSCK_FIX_exit:
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_yes:
+ if (flags & FSCK_CAN_FIX)
+ return -BCH_ERR_fsck_fix;
+ fallthrough;
+ case FSCK_FIX_no:
+ if (flags & FSCK_CAN_IGNORE)
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_ask:
+ if (flags & FSCK_AUTOFIX)
+ return -BCH_ERR_fsck_fix;
+ return -BCH_ERR_fsck_ask;
+ default:
+ BUG();
+ }
+ } else {
+ if ((flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe))
+ return -BCH_ERR_fsck_fix;
+
+ if (c->opts.errors == BCH_ON_ERROR_continue &&
+ (flags & FSCK_CAN_IGNORE))
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ }
+}
+
int __bch2_fsck_err(struct bch_fs *c,
struct btree_trans *trans,
enum bch_fsck_flags flags,
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index 0b3ede1c..d89dd270 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu
#define bch2_count_fsck_err(_c, _err, ...) \
__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
+int bch2_fsck_err_opt(struct bch_fs *,
+ enum bch_fsck_flags,
+ enum bch_sb_error_id);
+
__printf(5, 6) __cold
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
enum bch_fsck_flags,
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 7058a343..18506061 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
struct bch_extent_crc_unpacked,
enum bch_extent_entry_type);
+void bch2_io_failures_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_io_failures *failed)
+{
+ static const char * const error_types[] = {
+ "io", "checksum", "ec reconstruct", NULL
+ };
+
+ for (struct bch_dev_io_failures *f = failed->devs;
+ f < failed->devs + failed->nr;
+ f++) {
+ unsigned errflags =
+ ((!!f->failed_io) << 0) |
+ ((!!f->failed_csum_nr) << 1) |
+ ((!!f->failed_ec) << 2);
+
+ if (!errflags)
+ continue;
+
+ bch2_printbuf_make_room(out, 1024);
+ rcu_read_lock();
+ out->atomic++;
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
+ if (ca)
+ prt_str(out, ca->name);
+ else
+ prt_printf(out, "(invalid device %u)", f->dev);
+ --out->atomic;
+ rcu_read_unlock();
+
+ prt_char(out, ' ');
+
+ if (is_power_of_2(errflags)) {
+ prt_bitflags(out, error_types, errflags);
+ prt_str(out, " error");
+ } else {
+ prt_str(out, "errors: ");
+ prt_bitflags(out, error_types, errflags);
+ }
+ prt_newline(out);
+ }
+}
+
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
unsigned dev)
{
@@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
f->failed_csum_nr++;
}
+void bch2_mark_btree_validate_failure(struct bch_io_failures *failed,
+ unsigned dev)
+{
+ struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ memset(f, 0, sizeof(*f));
+ f->dev = dev;
+ }
+
+ f->failed_btree_validate = true;
+}
+
static inline u64 dev_latency(struct bch_dev *ca)
{
return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
@@ -179,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
have_io_errors |= f->failed_io;
+ have_io_errors |= f->failed_btree_validate;
have_io_errors |= f->failed_ec;
}
have_csum_errors |= !!f->failed_csum_nr;
@@ -186,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (p.has_ec && (f->failed_io || f->failed_csum_nr))
p.do_ec_reconstruct = true;
else if (f->failed_io ||
+ f->failed_btree_validate ||
f->failed_csum_nr > c->opts.checksum_err_retry_nr)
continue;
}
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index e78a39e7..f25be08d 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -399,10 +399,13 @@ out: \
/* utility code common to all keys with pointers: */
+void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_failures *);
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
unsigned);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *, bool);
+void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *, int);
diff --git a/libbcachefs/extents_types.h b/libbcachefs/extents_types.h
index e51529dc..b23ce4a3 100644
--- a/libbcachefs/extents_types.h
+++ b/libbcachefs/extents_types.h
@@ -34,6 +34,7 @@ struct bch_io_failures {
u8 dev;
unsigned failed_csum_nr:6,
failed_io:1,
+ failed_btree_validate:1,
failed_ec:1;
} devs[BCH_REPLICAS_MAX + 1];
};
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 0422dcab..dea63ede 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -53,7 +53,7 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_subvolume *);
/* Set VFS inode flags from bcachefs inode: */
-static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
+static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode)
{
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_sync] = S_SYNC,
@@ -64,8 +64,10 @@ static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
- if (inode->ei_inode.bi_casefold)
+ if (bch2_inode_casefold(c, &inode->ei_inode))
inode->v.i_flags |= S_CASEFOLD;
+ else
+ inode->v.i_flags &= ~S_CASEFOLD;
}
void bch2_inode_update_after_write(struct btree_trans *trans,
@@ -96,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
inode->ei_inode = *bi;
- bch2_inode_flags_to_vfs(inode);
+ bch2_inode_flags_to_vfs(c, inode);
}
int __must_check bch2_write_inode(struct bch_fs *c,
@@ -647,13 +649,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
const struct qstr *name)
{
struct bch_fs *c = trans->c;
- struct btree_iter dirent_iter = {};
subvol_inum inum = {};
struct printbuf buf = PRINTBUF;
+ struct qstr lookup_name;
+ int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
+ if (ret)
+ return ERR_PTR(ret);
+
+ struct btree_iter dirent_iter = {};
struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
- dir_hash_info, dir, name, 0);
- int ret = bkey_err(k);
+ dir_hash_info, dir, &lookup_name, 0);
+ ret = bkey_err(k);
if (ret)
return ERR_PTR(ret);
@@ -841,6 +848,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
*/
set_nlink(&inode->v, 0);
}
+
+ if (IS_CASEFOLDED(vdir))
+ d_invalidate(dentry);
err:
bch2_trans_put(trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
@@ -1251,10 +1261,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap,
return finish_open_simple(file, 0);
}
+struct bch_fiemap_extent {
+ struct bkey_buf kbuf;
+ unsigned flags;
+};
+
static int bch2_fill_extent(struct bch_fs *c,
struct fiemap_extent_info *info,
- struct bkey_s_c k, unsigned flags)
+ struct bch_fiemap_extent *fe)
{
+ struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k);
+ unsigned flags = fe->flags;
+
+ BUG_ON(!k.k->size);
+
if (bkey_extent_is_direct_data(k.k)) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1307,110 +1327,223 @@ static int bch2_fill_extent(struct bch_fs *c,
}
}
-static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
- u64 start, u64 len)
+/*
+ * Scan a range of an inode for data in pagecache.
+ *
+ * Intended to be retryable, so don't modify the output params until success is
+ * imminent.
+ */
+static int
+bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end,
+ bool nonblock)
{
- struct bch_fs *c = vinode->i_sb->s_fs_info;
- struct bch_inode_info *ei = to_bch_ei(vinode);
- struct btree_trans *trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_buf cur, prev;
- bool have_extent = false;
- int ret = 0;
+ loff_t dstart, dend;
- ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
- if (ret)
+ dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock);
+ if (dstart < 0)
+ return dstart;
+
+ if (dstart == *end) {
+ *start = dstart;
+ return 0;
+ }
+
+ dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock);
+ if (dend < 0)
+ return dend;
+
+ /* race */
+ BUG_ON(dstart == dend);
+
+ *start = dstart;
+ *end = dend;
+ return 0;
+}
+
+/*
+ * Scan a range of pagecache that corresponds to a file mapping hole in the
+ * extent btree. If data is found, fake up an extent key so it looks like a
+ * delalloc extent to the rest of the fiemap processing code.
+ */
+static int
+bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode,
+ u64 start, u64 end, struct bch_fiemap_extent *cur)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i_extent *delextent;
+ struct bch_extent_ptr ptr = {};
+ loff_t dstart = start << 9, dend = end << 9;
+ int ret;
+
+ /*
+ * We hold btree locks here so we cannot block on folio locks without
+ * dropping trans locks first. Run a nonblocking scan for the common
+ * case of no folios over holes and fall back on failure.
+ *
+ * Note that dropping locks like this is technically racy against
+ * writeback inserting to the extent tree, but a non-sync fiemap scan is
+ * fundamentally racy with writeback anyways. Therefore, just report the
+ * range as delalloc regardless of whether we have to cycle trans locks.
+ */
+ ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true);
+ if (ret == -EAGAIN)
+ ret = drop_locks_do(trans,
+ bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false));
+ if (ret < 0)
return ret;
- struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
- if (start + len < start)
- return -EINVAL;
+ /*
+ * Create a fake extent key in the buffer. We have to add a dummy extent
+ * pointer for the fill code to add an extent entry. It's explicitly
+ * zeroed to reflect delayed allocation (i.e. phys offset 0).
+ */
+ bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64));
+ delextent = bkey_extent_init(cur->kbuf.k);
+ delextent->k.p = POS(inode->ei_inum.inum, dend >> 9);
+ delextent->k.size = (dend - dstart) >> 9;
+ bch2_bkey_append_ptr(&delextent->k_i, ptr);
- start >>= 9;
+ cur->flags = FIEMAP_EXTENT_DELALLOC;
- bch2_bkey_buf_init(&cur);
- bch2_bkey_buf_init(&prev);
- trans = bch2_trans_get(c);
+ return 0;
+}
+static int bch2_next_fiemap_extent(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ u64 start, u64 end,
+ struct bch_fiemap_extent *cur)
+{
+ u32 snapshot;
+ int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
- POS(ei->v.i_ino, start), 0);
+ SPOS(inode->ei_inum.inum, start, snapshot), 0);
- while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- enum btree_id data_btree = BTREE_ID_extents;
+ struct bkey_s_c k =
+ bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end));
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
- bch2_trans_begin(trans);
+ ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
+ if (ret)
+ goto err;
- u32 snapshot;
- ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
- if (ret)
- continue;
+ struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k);
- bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
+ /*
+ * Does the pagecache or the btree take precedence?
+ *
+ * It _should_ be the pagecache, so that we correctly report delalloc
+ * extents when dirty in the pagecache (we're COW, after all).
+ *
+ * But we'd have to add per-sector writeback tracking to
+ * bch_folio_state, otherwise we report delalloc extents for clean
+ * cached data in the pagecache.
+ *
+ * We should do this, but even then fiemap won't report stable mappings:
+ * on bcachefs data moves around in the background (copygc, rebalance)
+ * and we don't provide a way for userspace to lock that out.
+ */
+ if (k.k &&
+ bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)),
+ pagecache_start)) {
+ bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k);
+ bch2_cut_front(iter.pos, cur->kbuf.k);
+ bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k);
+ cur->flags = 0;
+ } else if (k.k) {
+ bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k);
+ }
- k = bch2_btree_iter_peek_max(trans, &iter, end);
- ret = bkey_err(k);
+ if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) {
+ unsigned sectors = cur->kbuf.k->k.size;
+ s64 offset_into_extent = 0;
+ enum btree_id data_btree = BTREE_ID_extents;
+ ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent,
+ &cur->kbuf);
if (ret)
- continue;
+ goto err;
- if (!k.k)
- break;
+ struct bkey_i *k = cur->kbuf.k;
+ sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent);
- if (!bkey_extent_is_data(k.k) &&
- k.k->type != KEY_TYPE_reservation) {
- bch2_btree_iter_advance(trans, &iter);
- continue;
- }
+ bch2_cut_front(POS(k->k.p.inode,
+ bkey_start_offset(&k->k) + offset_into_extent),
+ k);
+ bch2_key_resize(&k->k, sectors);
+ k->k.p = iter.pos;
+ k->k.p.offset += k->k.size;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
- s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
- unsigned sectors = k.k->size - offset_into_extent;
+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
+ u64 start, u64 len)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(vinode);
+ struct btree_trans *trans;
+ struct bch_fiemap_extent cur, prev;
+ int ret = 0;
+
+ ret = fiemap_prep(&ei->v, info, start, &len, 0);
+ if (ret)
+ return ret;
+
+ if (start + len < start)
+ return -EINVAL;
+
+ start >>= 9;
+ u64 end = (start + len) >> 9;
- bch2_bkey_buf_reassemble(&cur, c, k);
+ bch2_bkey_buf_init(&cur.kbuf);
+ bch2_bkey_buf_init(&prev.kbuf);
+ bkey_init(&prev.kbuf.k->k);
- ret = bch2_read_indirect_extent(trans, &data_btree,
- &offset_into_extent, &cur);
+ trans = bch2_trans_get(c);
+
+ while (start < end) {
+ ret = lockrestart_do(trans,
+ bch2_next_fiemap_extent(trans, ei, start, end, &cur));
if (ret)
- continue;
+ goto err;
- k = bkey_i_to_s_c(cur.k);
- bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
+ BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start);
+ BUG_ON(cur.kbuf.k->k.p.offset > end);
- sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
+ if (bkey_start_offset(&cur.kbuf.k->k) == end)
+ break;
- bch2_cut_front(POS(k.k->p.inode,
- bkey_start_offset(k.k) +
- offset_into_extent),
- cur.k);
- bch2_key_resize(&cur.k->k, sectors);
- cur.k->k.p = iter.pos;
- cur.k->k.p.offset += cur.k->k.size;
+ start = cur.kbuf.k->k.p.offset;
- if (have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info,
- bkey_i_to_s_c(prev.k), 0);
+ ret = bch2_fill_extent(c, info, &prev);
if (ret)
- break;
+ goto err;
}
- bkey_copy(prev.k, cur.k);
- have_extent = true;
-
- bch2_btree_iter_set_pos(trans, &iter,
- POS(iter.pos.inode, iter.pos.offset + sectors));
+ bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k);
+ prev.flags = cur.flags;
}
- bch2_trans_iter_exit(trans, &iter);
- if (!ret && have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
- FIEMAP_EXTENT_LAST);
+ prev.flags |= FIEMAP_EXTENT_LAST;
+ ret = bch2_fill_extent(c, info, &prev);
}
-
+err:
bch2_trans_put(trans);
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret < 0 ? ret : 0;
+ bch2_bkey_buf_exit(&cur.kbuf, c);
+ bch2_bkey_buf_exit(&prev.kbuf, c);
+
+ return bch2_err_class(ret < 0 ? ret : 0);
}
static const struct vm_operations_struct bch_vm_ops = {
@@ -1487,13 +1620,14 @@ static int bch2_fileattr_get(struct dentry *dentry,
struct fileattr *fa)
{
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
- if (inode->ei_inode.bi_casefold)
+ if (bch2_inode_casefold(c, &inode->ei_inode))
fa->flags |= FS_CASEFOLD_FL;
fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
@@ -1526,7 +1660,7 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
(s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
return -EINVAL;
- if (s->casefold != bi->bi_casefold) {
+ if (s->casefold != bch2_inode_casefold(c, bi)) {
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
@@ -1547,9 +1681,8 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
- bi->bi_casefold = s->casefold;
- bi->bi_fields_set &= ~BIT(Inode_opt_casefold);
- bi->bi_fields_set |= s->casefold << Inode_opt_casefold;
+ bi->bi_casefold = s->casefold + 1;
+ bi->bi_fields_set |= BIT(Inode_opt_casefold);
#else
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
@@ -2445,6 +2578,11 @@ got_sb:
if (ret)
goto err_put_super;
+#ifdef CONFIG_UNICODE
+ sb->s_encoding = c->cf_encoding;
+#endif
+ generic_set_sb_d_ops(sb);
+
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
bch_err_msg(c, ret, "mounting: error getting root inode");
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 1b2fc902..bb31e953 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
}
}
+static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
+{
+ /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
+ return bi->bi_casefold
+ ? bi->bi_casefold - 1
+ : c->opts.casefold;
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index f23c3bfe..73a0a42a 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -573,7 +573,6 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
- int orig_error = rbio->ret;
struct btree_trans *trans = bch2_trans_get(c);
@@ -614,10 +613,11 @@ static void bch2_rbio_retry(struct work_struct *work)
if (ret) {
rbio->ret = ret;
rbio->bio.bi_status = BLK_STS_IOERR;
- } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
- orig_error != -BCH_ERR_data_read_ptr_stale_race &&
- !failed.nr) {
+ }
+
+ if (failed.nr || ret) {
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf,
@@ -625,9 +625,22 @@ static void bch2_rbio_retry(struct work_struct *work)
read_pos.offset << 9));
if (rbio->data_update)
prt_str(&buf, "(internal move) ");
- prt_str(&buf, "successful retry");
- bch_err_ratelimited(c, "%s", buf.buf);
+ prt_str(&buf, "data read error, ");
+ if (!ret)
+ prt_str(&buf, "successful retry");
+ else
+ prt_str(&buf, bch2_err_str(ret));
+ prt_newline(&buf);
+
+ if (!bkey_deleted(&sk.k->k)) {
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
+ prt_newline(&buf);
+ }
+
+ bch2_io_failures_to_text(&buf, c, &failed);
+
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
@@ -662,27 +675,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
}
}
-static void bch2_read_io_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bio *bio = &rbio->bio;
- struct bch_fs *c = rbio->c;
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
-
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- printbuf_exit(&buf);
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
-}
-
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
struct bch_read_bio *rbio)
{
@@ -746,31 +738,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
__bch2_rbio_narrow_crcs(trans, rbio));
}
-static void bch2_read_csum_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bch_fs *c = rbio->c;
- struct bio *src = &rbio->bio;
- struct bch_extent_crc_unpacked crc = rbio->pick.crc;
- struct nonce nonce = extent_nonce(rbio->version, crc);
- struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_str(&buf, "data ");
- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
-
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
-}
-
static void bch2_read_decompress_err(struct work_struct *work)
{
struct bch_read_bio *rbio =
@@ -931,7 +898,7 @@ out:
memalloc_nofs_restore(nofs_flags);
return;
csum_err:
- bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
goto out;
decompression_err:
bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
@@ -957,7 +924,7 @@ static void bch2_read_endio(struct bio *bio)
rbio->bio.bi_end_io = rbio->end_io;
if (unlikely(bio->bi_status)) {
- bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
return;
}
@@ -1289,14 +1256,6 @@ retry_pick:
if (likely(!rbio->pick.do_ec_reconstruct)) {
if (unlikely(!rbio->have_ioref)) {
- struct printbuf buf = PRINTBUF;
- bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
- prt_printf(&buf, "no device to read from:\n ");
- bch2_bkey_val_to_text(&buf, c, k);
-
- bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
-
bch2_rbio_error(rbio,
-BCH_ERR_data_read_retry_device_offline,
BLK_STS_IOERR);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 66bfb95f..c3a32403 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -214,18 +214,20 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
if (nr_online < metadata_replicas_required(c)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
- prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
- "rw journal devs:", nr_online, metadata_replicas_required(c));
-
- rcu_read_lock();
- for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
- prt_printf(&buf, " %s", ca->name);
- rcu_read_unlock();
-
- bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+ prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
+ "rw journal devs:", nr_online, metadata_replicas_required(c));
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
+ prt_printf(&buf, " %s", ca->name);
+ rcu_read_unlock();
+
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
ret = -BCH_ERR_insufficient_journal_devices;
goto out;
}
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index f605bd01..532566ea 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -675,7 +675,7 @@ root_err:
if (ret)
break;
- if (bkey_ge(bkey_start_pos(k.k), end))
+ if (bkey_gt(bkey_start_pos(k.k), end))
break;
if (ctxt->stats)
@@ -750,7 +750,8 @@ next:
if (ctxt->stats)
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
next_nondata:
- bch2_btree_iter_advance(trans, &iter);
+ if (!bch2_btree_iter_advance(trans, &iter))
+ break;
}
out:
bch2_trans_iter_exit(trans, &reflink_iter);
diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c
index 46f3c8b1..52c58c6d 100644
--- a/libbcachefs/namei.c
+++ b/libbcachefs/namei.c
@@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
bool ret = false;
for (id = 0; id < Inode_opt_nr; id++) {
+ if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
+ continue;
+
/* Skip attributes that were explicitly set on this inode */
if (dst_u->bi_fields_set & (1 << id))
continue;
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index e36fcab9..6aae686f 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -33,7 +33,9 @@
#include <linux/sort.h>
#include <linux/stat.h>
-int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c,
+ struct printbuf *msg,
+ enum btree_id btree)
{
u64 b = BIT_ULL(btree);
int ret = 0;
@@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!(c->sb.btrees_lost_data & b)) {
- struct printbuf buf = PRINTBUF;
- bch2_btree_id_to_text(&buf, btree);
- bch_err(c, "flagging btree %s lost data", buf.buf);
- printbuf_exit(&buf);
+ prt_printf(msg, "flagging btree ");
+ bch2_btree_id_to_text(msg, btree);
+ prt_printf(msg, " lost data\n");
+
ext->btrees_lost_data |= cpu_to_le64(b);
}
/* Once we have runtime self healing for topology errors we won't need this: */
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret;
/* Btree node accounting will be off: */
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
#ifdef CONFIG_BCACHEFS_DEBUG
/*
* These are much more minor, and don't need to be corrected right away,
* but in debug mode we want the next fsck run to be clean:
*/
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
#endif
switch (btree) {
case BTREE_ID_alloc:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
__set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
@@ -77,26 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
goto out;
case BTREE_ID_backpointers:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
goto out;
case BTREE_ID_need_discard:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_freespace:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_bucket_gens:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_lru:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_accounting:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ goto out;
+ case BTREE_ID_snapshots:
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
goto out;
default:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
goto out;
}
out:
@@ -583,9 +589,6 @@ static int read_btree_roots(struct bch_fs *c)
buf.buf, bch2_err_str(ret))) {
if (btree_id_is_alloc(i))
r->error = 0;
-
- ret = bch2_btree_lost_data(c, i);
- BUG_ON(ret);
}
}
@@ -734,6 +737,11 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.read_only = true;
}
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_info(c, "filesystem is an unresized image file, mounting ro");
+ c->opts.read_only = true;
+ }
+
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
@@ -949,8 +957,6 @@ use_clean:
set_bit(BCH_FS_btree_running, &c->flags);
ret = bch2_sb_set_upgrade_extra(c);
-
- ret = bch2_fs_resize_on_mount(c);
if (ret)
goto err;
diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h
index d858ba67..c023f52f 100644
--- a/libbcachefs/recovery.h
+++ b/libbcachefs/recovery.h
@@ -2,7 +2,7 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
void bch2_reconstruct_alloc(struct bch_fs *);
int bch2_journal_replay(struct bch_fs *);
diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c
index 87150dd3..e14aca00 100644
--- a/libbcachefs/recovery_passes.c
+++ b/libbcachefs/recovery_passes.c
@@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
if (print)
- prt_printf(out, "need recovery pass %s (%u), but already rw",
+ prt_printf(out, "need recovery pass %s (%u), but already rw\n",
bch2_recovery_passes[pass], pass);
return -BCH_ERR_cannot_rewind_recovery;
}
if (print)
- prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n",
bch2_recovery_passes[pass], pass,
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
@@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
}
}
-int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
+static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
struct printbuf *out,
enum bch_recovery_pass pass)
{
@@ -193,32 +193,30 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
return ret;
}
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
- enum bch_recovery_pass pass)
+int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ struct printbuf *out,
+ enum bch_recovery_pass pass)
{
lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
- return bch2_run_explicit_recovery_pass(c, pass);
+ return bch2_run_explicit_recovery_pass_printbuf(c, out, pass);
}
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ struct printbuf *out,
enum bch_recovery_pass pass)
{
- enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+ if (c->sb.recovery_passes_required & BIT_ULL(pass))
+ return 0;
mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
-
- if (!test_bit_le64(s, ext->recovery_passes_required)) {
- __set_bit_le64(s, ext->recovery_passes_required);
- bch2_write_super(c);
- }
+ int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass);
mutex_unlock(&c->sb_lock);
- return bch2_run_explicit_recovery_pass(c, pass);
+ return ret;
}
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h
index e19a8aab..f33dd005 100644
--- a/libbcachefs/recovery_passes.h
+++ b/libbcachefs/recovery_passes.h
@@ -8,12 +8,12 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
-int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *,
- struct printbuf *,
- enum bch_recovery_pass);
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass);
int bch2_run_online_recovery_passes(struct bch_fs *);
int bch2_run_recovery_passes(struct bch_fs *);
diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c
index 6a16767e..296c6c92 100644
--- a/libbcachefs/sb-downgrade.c
+++ b/libbcachefs/sb-downgrade.c
@@ -20,6 +20,10 @@
* x(version, recovery_passes, errors...)
*/
#define UPGRADE_TABLE() \
+ x(snapshot_2, \
+ RECOVERY_PASS_ALL_FSCK, \
+ BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \
+ BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \
x(backpointers, \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v3, \
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index 2e1d487a..98c89852 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -46,7 +46,7 @@ enum bch_fsck_flags {
x(btree_node_unsupported_version, 34, 0) \
x(btree_node_bset_older_than_sb_min, 35, 0) \
x(btree_node_bset_newer_than_sb, 36, 0) \
- x(btree_node_data_missing, 37, 0) \
+ x(btree_node_data_missing, 37, FSCK_AUTOFIX) \
x(btree_node_bset_after_end, 38, 0) \
x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
x(btree_node_replicas_data_mismatch, 40, 0) \
@@ -205,9 +205,9 @@ enum bch_fsck_flags {
x(snapshot_bad_depth, 184, 0) \
x(snapshot_bad_skiplist, 185, 0) \
x(subvol_pos_bad, 186, 0) \
- x(subvol_not_master_and_not_snapshot, 187, 0) \
+ x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \
x(subvol_to_missing_root, 188, 0) \
- x(subvol_root_wrong_bi_subvol, 189, 0) \
+ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index b2f9cb2d..e810d85c 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_allocations);
if (print)
@@ -35,9 +35,11 @@ void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
}
-void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
+void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
{
- bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
+ bch2_fs_inconsistent(ca->fs,
+ "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)",
+ bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets);
}
#define x(t, n, ...) [n] = #t,
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index 0337e34d..c9cb8f76 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -258,20 +258,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket)
{
struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode);
- if (ca && !bucket_valid(ca, bucket.offset)) {
+ if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
bch2_dev_put(ca);
ca = NULL;
}
return ca;
}
-void bch2_dev_bucket_missing(struct bch_fs *, struct bpos);
+void bch2_dev_bucket_missing(struct bch_dev *, u64);
static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket)
{
- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket);
- if (!ca)
- bch2_dev_bucket_missing(c, bucket);
+ struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
+ if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
+ bch2_dev_bucket_missing(ca, bucket.offset);
+ bch2_dev_put(ca);
+ ca = NULL;
+ }
return ca;
}
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 14ea09cc..94cf60f7 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
- if (bch2_err_matches(ret, EIO) ||
- (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
- ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
-
return ret;
}
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index a67ee4ad..0c1a0053 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
struct bch_hash_info {
u8 type;
- struct unicode_map *cf_encoding;
+ struct unicode_map *cf_encoding;
/*
* For crc32 or crc64 string hashes the first key value of
* the siphash_key (k0) is used as the key.
@@ -44,11 +44,10 @@ struct bch_hash_info {
static inline struct bch_hash_info
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
- /* XXX ick */
struct bch_hash_info info = {
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
- .cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL,
+ .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 39376c87..51ab2ee1 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
prt_printf(&buf, "missing subvolume %u", subvolid);
bool print = bch2_count_fsck_err(c, subvol_missing, &buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_inodes);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
@@ -62,8 +62,8 @@ static int check_subvol(struct btree_trans *trans,
ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
if (bch2_err_matches(ret, ENOENT))
- bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
- k.k->p.offset, snapid);
+ return bch2_run_explicit_recovery_pass(c,
+ BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
if (ret)
return ret;
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index d53cbc5f..8730d2e7 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c)
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext) {
+ c->sb.recovery_passes_required =
+ bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index f861583c..81a7e703 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -214,6 +214,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_fs_init_rw(struct bch_fs *);
+static int bch2_fs_resize_on_mount(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -567,6 +568,10 @@ static void __bch2_fs_free(struct bch_fs *c)
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
+#ifdef CONFIG_UNICODE
+ utf8_unload(c->cf_encoding);
+#endif
+
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_free_fsck_errs(c);
@@ -898,25 +903,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (ret)
goto err;
-#ifdef CONFIG_UNICODE
- /* Default encoding until we can potentially have more as an option. */
- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
- if (IS_ERR(c->cf_encoding)) {
- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
- ret = -EINVAL;
- goto err;
- }
-#else
- if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
- printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
- ret = -EINVAL;
- goto err;
- }
-#endif
-
/* Compat: */
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
@@ -1002,6 +988,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (ret)
goto err;
+#ifdef CONFIG_UNICODE
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+#else
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
+ printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
+ ret = -EINVAL;
+ goto err;
+ }
+#endif
+
for (i = 0; i < c->sb.nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
@@ -1070,6 +1079,40 @@ static void print_mount_opts(struct bch_fs *c)
printbuf_exit(&p);
}
+static bool bch2_fs_may_start(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned flags = 0;
+
+ switch (c->opts.degraded) {
+ case BCH_DEGRADED_very:
+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
+ break;
+ case BCH_DEGRADED_yes:
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ default:
+ mutex_lock(&c->sb_lock);
+ for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+
+ ca = bch2_dev_locked(c, i);
+
+ if (!bch2_dev_is_online(ca) &&
+ (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ ca->mi.state == BCH_MEMBER_STATE_ro)) {
+ mutex_unlock(&c->sb_lock);
+ return false;
+ }
+ }
+ mutex_unlock(&c->sb_lock);
+ break;
+ }
+
+ return bch2_have_enough_devs(c, c->online_devs, flags, true);
+}
+
int bch2_fs_start(struct bch_fs *c)
{
time64_t now = ktime_get_real_seconds();
@@ -1077,6 +1120,9 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
+ if (!bch2_fs_may_start(c))
+ return -BCH_ERR_insufficient_devices_to_start;
+
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
@@ -1106,6 +1152,12 @@ int bch2_fs_start(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->mi.state == BCH_MEMBER_STATE_rw)
@@ -1593,40 +1645,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
}
}
-static bool bch2_fs_may_start(struct bch_fs *c)
-{
- struct bch_dev *ca;
- unsigned flags = 0;
-
- switch (c->opts.degraded) {
- case BCH_DEGRADED_very:
- flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
- break;
- case BCH_DEGRADED_yes:
- flags |= BCH_FORCE_IF_DEGRADED;
- break;
- default:
- mutex_lock(&c->sb_lock);
- for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
- if (!bch2_member_exists(c->disk_sb.sb, i))
- continue;
-
- ca = bch2_dev_locked(c, i);
-
- if (!bch2_dev_is_online(ca) &&
- (ca->mi.state == BCH_MEMBER_STATE_rw ||
- ca->mi.state == BCH_MEMBER_STATE_ro)) {
- mutex_unlock(&c->sb_lock);
- return false;
- }
- }
- mutex_unlock(&c->sb_lock);
- break;
- }
-
- return bch2_have_enough_devs(c, c->online_devs, flags, true);
-}
-
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{
bch2_dev_io_ref_stop(ca, WRITE);
@@ -2096,10 +2114,8 @@ err:
return ret;
}
-int bch2_fs_resize_on_mount(struct bch_fs *c)
+static int bch2_fs_resize_on_mount(struct bch_fs *c)
{
- down_write(&c->state_lock);
-
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
u64 old_nbuckets = ca->mi.nbuckets;
u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
@@ -2138,9 +2154,6 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
}
}
}
-
- bch2_recalc_capacity(c);
- up_write(&c->state_lock);
return 0;
}
@@ -2331,11 +2344,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
}
up_write(&c->state_lock);
- if (!bch2_fs_may_start(c)) {
- ret = -BCH_ERR_insufficient_devices_to_start;
- goto err_print;
- }
-
if (!c->opts.nostart) {
ret = bch2_fs_start(c);
if (ret)
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index dbf59547..a0bbb9f4 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -29,7 +29,6 @@ int bch2_dev_add(struct bch_fs *, const char *);
int bch2_dev_online(struct bch_fs *, const char *);
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-int bch2_fs_resize_on_mount(struct bch_fs *);
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index c265b102..782a05fe 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
*/
static int test_peek_end(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
@@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 7e6ebe8c..1cff407c 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -252,6 +252,16 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
}
+static bool string_is_spaces(const char *str)
+{
+ while (*str) {
+ if (*str != ' ')
+ return false;
+ str++;
+ }
+ return true;
+}
+
void bch2_print_string_as_lines(const char *prefix, const char *lines,
bool nonblocking)
{
@@ -272,6 +282,9 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines,
while (*lines) {
p = strchrnul(lines, '\n');
+ if (!*p && string_is_spaces(lines))
+ break;
+
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
if (!*p)
break;