diff options
Diffstat (limited to 'libbcachefs')
29 files changed, 320 insertions, 133 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 002e3853..81e2ae4b 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -2442,8 +2442,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) * We clear the LRU and need_discard btrees first so that we don't race * with bch2_do_invalidates() and bch2_do_discards() */ - ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?: - bch2_btree_delete_range(c, BTREE_ID_lru, start, end, + ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end, BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, BTREE_TRIGGER_norun, NULL) ?: diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index c17c5733..b50846da 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -1336,11 +1336,31 @@ alloc_done: req->wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &req->wp->ptrs, ob, i) + open_bucket_for_each(c, &req->wp->ptrs, ob, i) { + /* + * Ensure proper write alignment - either due to misaligned + * bucket sizes (from buggy bcachefs-tools), or writes that mix + * logical/physical alignment: + */ + struct bch_dev *ca = ob_dev(c, ob); + u64 offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free; + unsigned align = round_up(offset, block_sectors(c)) - offset; + + ob->sectors_free = max_t(int, 0, ob->sectors_free - align); + req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free); + } req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c)); + /* Did alignment use up space in an open_bucket? */ + if (unlikely(!req->wp->sectors_free)) { + bch2_alloc_sectors_done(c, req->wp); + goto retry; + } + BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX); return 0; @@ -1368,20 +1388,6 @@ err: return ret; } -struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) -{ - struct bch_dev *ca = ob_dev(c, ob); - - return (struct bch_extent_ptr) { - .type = 1 << BCH_EXTENT_ENTRY_ptr, - .gen = ob->gen, - .dev = ob->dev, - .offset = bucket_to_sector(ca, ob->bucket) + - ca->mi.bucket_size - - ob->sectors_free, - }; -} - void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bkey_i *k, unsigned sectors, bool cached) diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index 19220341..2e01c7b6 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -3,6 +3,7 @@ #define _BCACHEFS_ALLOC_FOREGROUND_H #include "bcachefs.h" +#include "buckets.h" #include "alloc_types.h" #include "extents.h" #include "io_write_types.h" @@ -233,7 +234,19 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *, struct closure *, struct write_point **); -struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *); +static inline struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) +{ + struct bch_dev *ca = ob_dev(c, ob); + + return (struct bch_extent_ptr) { + .type = 1 << BCH_EXTENT_ENTRY_ptr, + .gen = ob->gen, + .dev = ob->dev, + .offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free, + }; +} /* * Append pointers to the space we just allocated to @k, and mark @sectors space diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index baaf9786..a483d440 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -871,6 +871,7 @@ LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22); LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); +LE64_BITMASK(BCH_SB_REBALANCE_AC_ONLY, struct bch_sb, flags[6], 23, 24); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index aec397b4..e079e12a 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -43,6 +43,7 @@ void bch2_btree_node_io_unlock(struct btree *b) clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } @@ -1442,6 +1443,7 @@ start: bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1648,6 +1650,7 @@ fsck_err: printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1777,6 +1780,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, set_btree_node_read_error(b); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; @@ -2120,8 +2124,10 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); - else + else { + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); + } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) @@ -2236,6 +2242,7 @@ static void btree_node_write_endio(struct bio *bio) } clear_btree_node_write_in_flight_inner(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_write_complete_wq, &wb->work); diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 8d75b27a..af1532de 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -44,6 +44,7 @@ static inline void bucket_unlock(struct bucket *b) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 417f89dd..edfbb9d3 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -2112,23 +2112,17 @@ err: /* device removal */ -static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a) +int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + unsigned dev_idx, + unsigned flags) { - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert); - - if (!a->stripe) + if (k.k->type != KEY_TYPE_stripe) return 0; - if (a->stripe_sectors) { - bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data"); - return -BCH_ERR_invalidate_stripe_to_dev; - } - - struct btree_iter iter; struct bkey_i_stripe *s = - bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe), - BTREE_ITER_slots, stripe); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe); int ret = PTR_ERR_OR_ZERO(s); if (ret) return ret; @@ -2145,36 +2139,79 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); if (ret) - goto err; + return ret; struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i)); - bkey_for_each_ptr(ptrs, ptr) - if (ptr->dev == k_a.k->p.inode) + + /* XXX: how much redundancy do we still have? check degraded flags */ + + unsigned nr_good = 0; + + rcu_read_lock(); + bkey_for_each_ptr(ptrs, ptr) { + if (ptr->dev == dev_idx) ptr->dev = BCH_SB_MEMBER_INVALID; + struct bch_dev *ca = bch2_dev_rcu(trans->c, ptr->dev); + nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed; + } + rcu_read_unlock(); + + if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) + return -BCH_ERR_remove_would_lose_data; + + unsigned nr_data = s->v.nr_blocks - s->v.nr_redundant; + + if (nr_good < nr_data && !(flags & BCH_FORCE_IF_DATA_LOST)) + return -BCH_ERR_remove_would_lose_data; + sectors = -sectors; memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; - ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); + return bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); +} + +static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, struct bkey_s_c k_a, + unsigned flags) +{ + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert); + + if (!a->stripe) + return 0; + + if (a->stripe_sectors) { + bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data"); + return -BCH_ERR_invalidate_stripe_to_dev; + } + + struct btree_iter iter; + struct bkey_s_c_stripe s = + bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe), + BTREE_ITER_slots, stripe); + int ret = bkey_err(s); if (ret) - goto err; -err: + return ret; + + ret = bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags); bch2_trans_iter_exit(trans, &iter); return ret; } -int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) +int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - return bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ - bch2_invalidate_stripe_to_dev(trans, k); + bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); }))); + bch_err_fn(c, ret); + return ret; } /* startup/shutdown */ diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 6780292d..548048ad 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -160,6 +160,7 @@ static inline void gc_stripe_unlock(struct gc_stripe *s) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR); } @@ -287,7 +288,9 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, } } -int bch2_dev_remove_stripes(struct bch_fs *, unsigned); +int bch2_invalidate_stripe_to_dev(struct btree_trans *, struct btree_iter *, + struct bkey_s_c, unsigned, unsigned); +int bch2_dev_remove_stripes(struct bch_fs *, unsigned, unsigned); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_fs_ec_stop(struct bch_fs *); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 13e28f9a..4aac0182 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -275,7 +275,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, journal_shutdown) \ + x(EROFS, journal_shutdown) \ x(EIO, journal_flush_err) \ x(EIO, journal_write_err) \ x(EIO, btree_node_read_err) \ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 2d41260a..d3af841e 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -1140,6 +1140,7 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c, union bch_extent_entry *entry; struct extent_ptr_decoded p; bool have_cached_ptr; + unsigned drop_dev = ptr->dev; rcu_read_lock(); restart_drop_ptrs: @@ -1157,6 +1158,7 @@ restart_drop_ptrs: if (p.ptr.cached) { if (have_cached_ptr || !want_cached_ptr(c, opts, &p.ptr)) { bch2_bkey_drop_ptr_noerror(k, &entry->ptr); + ptr = NULL; goto restart_drop_ptrs; } @@ -1164,6 +1166,11 @@ restart_drop_ptrs: } } + if (!ptr) + bkey_for_each_ptr(ptrs, ptr2) + if (ptr2->dev == drop_dev) + ptr = ptr2; + if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) goto drop; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index f1472e91..88e88de3 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -2503,10 +2503,9 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); - /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops - */ + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2574,10 +2573,6 @@ got_sb: sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; - #ifdef CONFIG_UNICODE sb->s_encoding = c->cf_encoding; #endif diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index add23676..fb57699a 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1471,7 +1471,9 @@ static int check_key_has_inode(struct btree_trans *trans, if (k.k->type == KEY_TYPE_whiteout) goto out; - if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { + bool have_inode = i && !i->whiteout; + + if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) @@ -1482,14 +1484,14 @@ static int check_key_has_inode(struct btree_trans *trans, goto err; } - if (fsck_err_on(!i, + if (fsck_err_on(!have_inode, trans, key_in_missing_inode, "key in missing inode:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) goto delete; - if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), + if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), trans, key_in_wrong_inode_type, "key for wrong inode mode %o:\n%s", i->inode.bi_mode, @@ -1877,7 +1879,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); inode->inodes.data && i >= inode->inodes.data; --i) { - if (i->inode.bi_snapshot > k.k->p.snapshot || + if (i->whiteout || + i->inode.bi_snapshot > k.k->p.snapshot || !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) continue; @@ -2188,7 +2191,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (!i) + if (!i || i->whiteout) goto out; if (dir->first_this_inode) @@ -2328,7 +2331,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, if (ret) return ret; - if (!i) + if (!i || i->whiteout) return 0; if (inode->first_this_inode) diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index e91c9848..c593d77d 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -19,6 +19,7 @@ #include <linux/ioprio.h> #include <linux/string_choices.h> +#include <linux/sched/sysctl.h> void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { @@ -1263,7 +1264,8 @@ int bch2_journal_read(struct bch_fs *c, degraded = true; } - closure_sync(&jlist.cl); + while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; if (jlist.ret) return jlist.ret; @@ -2045,15 +2047,6 @@ CLOSURE_CALLBACK(bch2_journal_write) j->write_start_time = local_clock(); - mutex_lock(&j->buf_lock); - journal_buf_realloc(j, w); - - ret = bch2_journal_write_prep(j, w); - mutex_unlock(&j->buf_lock); - - if (unlikely(ret)) - goto err; - spin_lock(&j->lock); if (nr_rw_members > 1) w->separate_flush = true; @@ -2064,6 +2057,15 @@ CLOSURE_CALLBACK(bch2_journal_write) if (unlikely(ret)) goto err; + mutex_lock(&j->buf_lock); + journal_buf_realloc(j, w); + + ret = bch2_journal_write_prep(j, w); + mutex_unlock(&j->buf_lock); + + if (unlikely(ret)) + goto err; + unsigned replicas_allocated = 0; while (1) { ret = journal_write_alloc(j, w, &replicas_allocated); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 17beba0e..fb4e850c 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -268,10 +268,9 @@ out: static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - bool ret; - spin_lock(&j->lock); - ret = ja->discard_idx != ja->dirty_idx_ondisk; + bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < 4 && + ja->discard_idx != ja->dirty_idx_ondisk; spin_unlock(&j->lock); return ret; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index f431586a..bb7a9227 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -10,6 +10,7 @@ #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "buckets.h" +#include "ec.h" #include "errcode.h" #include "extents.h" #include "io_write.h" @@ -210,19 +211,28 @@ static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx, unsigned flags) { struct btree_iter iter; - struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); + struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, + last_flushed); int ret = bkey_err(k); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) return 0; if (ret) return ret; - if (!bch2_bkey_has_device_c(k, dev_idx)) + if (!k.k || !bch2_bkey_has_device_c(k, dev_idx)) goto out; - ret = bkey_is_btree_ptr(k.k) - ? bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags) - : bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); + /* + * XXX: pass flags arg to invalidate_stripe_to_dev and handle it + * properly + */ + + if (bkey_is_btree_ptr(k.k)) + ret = bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags); + else if (k.k->type == KEY_TYPE_stripe) + ret = bch2_invalidate_stripe_to_dev(trans, &iter, k, dev_idx, flags); + else + ret = bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); out: bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 532566ea..ff56d888 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -854,7 +854,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto err; ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "flushing btree write buffer"); if (ret) goto err; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index b8cd0b04..2a026062 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -490,6 +490,17 @@ enum fsck_err_opts { BCH2_NO_SB_OPT, true, \ NULL, "Enable rebalance: disable for debugging, or to\n"\ "quiet the system when doing performance testing\n")\ + x(rebalance_on_ac_only, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_REBALANCE_AC_ONLY, false, \ + NULL, "Enable rebalance while on mains power only\n") \ + x(auto_snapshot_deletion, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, "Enable automatic snapshot deletion: disable for debugging, or to\n"\ + "quiet the system when doing performance testing\n")\ x(no_data_io, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index a94f66ff..0546ed35 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -518,6 +518,13 @@ static void rebalance_wait(struct bch_fs *c) bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); } +static bool bch2_rebalance_enabled(struct bch_fs *c) +{ + return c->opts.rebalance_enabled && + !(c->opts.rebalance_on_ac_only && + c->rebalance.on_battery); +} + static int do_rebalance(struct moving_context *ctxt) { struct btree_trans *trans = ctxt->trans; @@ -537,9 +544,9 @@ static int do_rebalance(struct moving_context *ctxt) BTREE_ITER_all_snapshots); while (!bch2_move_ratelimit(ctxt)) { - if (!c->opts.rebalance_enabled) { + if (!bch2_rebalance_enabled(c)) { bch2_moving_ctxt_flush_all(ctxt); - kthread_wait_freezable(c->opts.rebalance_enabled || + kthread_wait_freezable(bch2_rebalance_enabled(c) || kthread_should_stop()); } @@ -714,9 +721,42 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; } -void bch2_fs_rebalance_init(struct bch_fs *c) +#ifdef CONFIG_POWER_SUPPLY +#include <linux/power_supply.h> + +static int bch2_rebalance_power_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct bch_fs *c = container_of(nb, struct bch_fs, rebalance.power_notifier); + + c->rebalance.on_battery = !power_supply_is_system_supplied(); + bch2_rebalance_wakeup(c); + return NOTIFY_OK; +} +#endif + +void bch2_fs_rebalance_exit(struct bch_fs *c) { - bch2_pd_controller_init(&c->rebalance.pd); +#ifdef CONFIG_POWER_SUPPLY + power_supply_unreg_notifier(&c->rebalance.power_notifier); +#endif +} + +int bch2_fs_rebalance_init(struct bch_fs *c) +{ + struct bch_fs_rebalance *r = &c->rebalance; + + bch2_pd_controller_init(&r->pd); + +#ifdef CONFIG_POWER_SUPPLY + r->power_notifier.notifier_call = bch2_rebalance_power_notifier; + int ret = power_supply_reg_notifier(&r->power_notifier); + if (ret) + return ret; + + r->on_battery = !power_supply_is_system_supplied(); +#endif + return 0; } static int check_rebalance_work_one(struct btree_trans *trans, diff --git a/libbcachefs/rebalance.h b/libbcachefs/rebalance.h index b7c8c065..5d9214fe 100644 --- a/libbcachefs/rebalance.h +++ b/libbcachefs/rebalance.h @@ -52,7 +52,9 @@ void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); -void bch2_fs_rebalance_init(struct bch_fs *); + +void bch2_fs_rebalance_exit(struct bch_fs *); +int bch2_fs_rebalance_init(struct bch_fs *); int bch2_check_rebalance_work(struct bch_fs *); diff --git a/libbcachefs/rebalance_types.h b/libbcachefs/rebalance_types.h index fe5098c1..33d77286 100644 --- a/libbcachefs/rebalance_types.h +++ b/libbcachefs/rebalance_types.h @@ -30,6 +30,11 @@ struct bch_fs_rebalance { struct bbpos scan_start; struct bbpos scan_end; struct bch_move_stats scan_stats; + + bool on_battery; +#ifdef CONFIG_POWER_SUPPLY + struct notifier_block power_notifier; +#endif }; #endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index e5c68c2f..b9568a68 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -196,7 +196,7 @@ static int validate_member(struct printbuf *err, } if (BCH_MEMBER_FREESPACE_INITIALIZED(&m) && - sb->features[0] & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + sb->features[0] & cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info))) { prt_printf(err, "device %u: freespace initialized but fs has no alloc info", i); return -BCH_ERR_invalid_sb_members; } diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index fc8fb076..05a23e95 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -211,9 +211,14 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); - prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", - BCH_SNAPSHOT_SUBVOL(s.v), - BCH_SNAPSHOT_WILL_DELETE(s.v), + if (BCH_SNAPSHOT_SUBVOL(s.v)) + prt_str(out, "subvol "); + if (BCH_SNAPSHOT_WILL_DELETE(s.v)) + prt_str(out, "will_delete "); + if (BCH_SNAPSHOT_DELETED(s.v)) + prt_str(out, "deleted "); + + prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u", le32_to_cpu(s.v->parent), le32_to_cpu(s.v->children[0]), le32_to_cpu(s.v->children[1]), @@ -1646,7 +1651,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s if (BCH_SNAPSHOT_DELETED(s.v)) return 0; - mutex_lock(&d->lock); + mutex_lock(&d->progress_lock); for (unsigned i = 0; i < 2; i++) { u32 child = le32_to_cpu(s.v->children[i]); @@ -1673,7 +1678,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s darray_push(&d->delete_interior, n); } } - mutex_unlock(&d->lock); + mutex_unlock(&d->progress_lock); return ret; } @@ -1759,24 +1764,31 @@ static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snap prt_printf(out, "deleting from trees"); darray_for_each(d->deleting_from_trees, i) prt_printf(out, " %u", *i); + prt_newline(out); prt_printf(out, "deleting leaves"); darray_for_each(d->delete_leaves, i) prt_printf(out, " %u", *i); + prt_newline(out); - prt_printf(out, " interior"); + prt_printf(out, "interior"); darray_for_each(d->delete_interior, i) prt_printf(out, " %u->%u", i->id, i->live_child); + prt_newline(out); } -int bch2_delete_dead_snapshots(struct bch_fs *c) +int __bch2_delete_dead_snapshots(struct bch_fs *c) { - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) + struct snapshot_delete *d = &c->snapshot_delete; + int ret = 0; + + if (!mutex_trylock(&d->lock)) return 0; + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) + goto out_unlock; + struct btree_trans *trans = bch2_trans_get(c); - struct snapshot_delete *d = &c->snapshot_delete; - int ret = 0; /* * For every snapshot node: If we have no live children and it's not @@ -1843,18 +1855,28 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } err: - mutex_lock(&d->lock); + mutex_lock(&d->progress_lock); darray_exit(&d->deleting_from_trees); darray_exit(&d->delete_interior); darray_exit(&d->delete_leaves); d->running = false; - mutex_unlock(&d->lock); + mutex_unlock(&d->progress_lock); bch2_trans_put(trans); +out_unlock: + mutex_unlock(&d->lock); if (!bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); return ret; } +int bch2_delete_dead_snapshots(struct bch_fs *c) +{ + if (!c->opts.auto_snapshot_deletion) + return 0; + + return __bch2_delete_dead_snapshots(c); +} + void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work); @@ -1867,6 +1889,9 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) void bch2_delete_dead_snapshots_async(struct bch_fs *c) { + if (!c->opts.auto_snapshot_deletion) + return; + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots)) return; @@ -1885,12 +1910,11 @@ void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c) return; } - mutex_lock(&d->lock); + mutex_lock(&d->progress_lock); bch2_snapshot_delete_nodes_to_text(out, d); - prt_newline(out); - mutex_unlock(&d->lock); bch2_bbpos_to_text(out, d->pos); + mutex_unlock(&d->progress_lock); } int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, @@ -1972,5 +1996,6 @@ void bch2_fs_snapshots_init_early(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work); mutex_init(&c->snapshot_delete.lock); + mutex_init(&c->snapshot_delete.progress_lock); mutex_init(&c->snapshots_unlinked_lock); } diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h index 69c484b7..63b9469e 100644 --- a/libbcachefs/snapshot.h +++ b/libbcachefs/snapshot.h @@ -273,6 +273,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, return __bch2_key_has_snapshot_overwrites(trans, id, pos); } +int __bch2_delete_dead_snapshots(struct bch_fs *); int bch2_delete_dead_snapshots(struct bch_fs *); void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_async(struct bch_fs *); diff --git a/libbcachefs/snapshot_types.h b/libbcachefs/snapshot_types.h index a64f4b94..0ab698f1 100644 --- a/libbcachefs/snapshot_types.h +++ b/libbcachefs/snapshot_types.h @@ -42,9 +42,10 @@ struct snapshot_interior_delete { typedef DARRAY(struct snapshot_interior_delete) interior_delete_list; struct snapshot_delete { + struct mutex lock; struct work_struct work; - struct mutex lock; + struct mutex progress_lock; snapshot_id_list deleting_from_trees; snapshot_id_list delete_leaves; interior_delete_list delete_interior; diff --git a/libbcachefs/str_hash.c b/libbcachefs/str_hash.c index a90bf7b8..55a3a116 100644 --- a/libbcachefs/str_hash.c +++ b/libbcachefs/str_hash.c @@ -157,6 +157,8 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans if (bkey_is_inode(k.k)) goto found; } + + /* This would've been caught by check_key_has_inode() */ bch_err(c, "%s(): inum %llu not found", __func__, inum); ret = -BCH_ERR_fsck_repair_unimplemented; goto err; @@ -166,20 +168,25 @@ found:; if (ret) goto err; - struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); - if (hash_info->type != hash2.type || - memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) { + struct bch_hash_info hash_root = bch2_hash_info_init(c, &inode); + if (hash_info->type != hash_root.type || + memcmp(&hash_info->siphash_key, + &hash_root.siphash_key, + sizeof(hash_root.siphash_key))) { ret = repair_inode_hash_info(trans, &inode); if (!ret) { - bch_err(c, "inode hash info mismatch with root, but mismatch not found\n" - "%u %llx %llx\n" - "%u %llx %llx", - hash_info->type, - hash_info->siphash_key.k0, - hash_info->siphash_key.k1, - hash2.type, - hash2.siphash_key.k0, - hash2.siphash_key.k1); + struct printbuf buf = PRINTBUF; + prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", inum); + + prt_printf(&buf, "root snapshot %u ", hash_root.inum_snapshot); + bch2_prt_str_hash_type(&buf, hash_root.type); + prt_printf(&buf, " %llx %llx\n", hash_root.siphash_key.k0, hash_root.siphash_key.k1); + + prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot); + bch2_prt_str_hash_type(&buf, hash_info->type); + prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); ret = -BCH_ERR_fsck_repair_unimplemented; } } diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 0c1a0053..ae3154fb 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -32,6 +32,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) } struct bch_hash_info { + u32 inum_snapshot; u8 type; struct unicode_map *cf_encoding; /* @@ -45,11 +46,12 @@ static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { struct bch_hash_info info = { - .type = INODE_STR_HASH(bi), + .inum_snapshot = bi->bi_snapshot, + .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif - .siphash_key = { .k0 = bi->bi_hash_seed } + .siphash_key = { .k0 = bi->bi_hash_seed } }; if (unlikely(info.type == BCH_STR_HASH_siphash_old)) { diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 18d8823c..1feb688f 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -574,35 +574,37 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); - bch2_fs_accounting_exit(c); - bch2_fs_async_obj_exit(c); - bch2_fs_sb_errors_exit(c); - bch2_fs_counters_exit(c); + bch2_fs_vfs_exit(c); bch2_fs_snapshots_exit(c); + bch2_fs_sb_errors_exit(c); + bch2_fs_replicas_exit(c); + bch2_fs_rebalance_exit(c); bch2_fs_quota_exit(c); + bch2_fs_nocow_locking_exit(c); + bch2_fs_journal_exit(&c->journal); bch2_fs_fs_io_direct_exit(c); bch2_fs_fs_io_buffered_exit(c); bch2_fs_fsio_exit(c); - bch2_fs_vfs_exit(c); - bch2_fs_ec_exit(c); - bch2_fs_encryption_exit(c); - bch2_fs_nocow_locking_exit(c); bch2_fs_io_write_exit(c); bch2_fs_io_read_exit(c); + bch2_fs_encryption_exit(c); + bch2_fs_ec_exit(c); + bch2_fs_counters_exit(c); + bch2_fs_compress_exit(c); + bch2_io_clock_exit(&c->io_clock[WRITE]); + bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_buckets_waiting_for_journal_exit(c); - bch2_fs_btree_interior_update_exit(c); + bch2_fs_btree_write_buffer_exit(c); bch2_fs_btree_key_cache_exit(&c->btree_key_cache); - bch2_fs_btree_cache_exit(c); bch2_fs_btree_iter_exit(c); - bch2_fs_replicas_exit(c); - bch2_fs_journal_exit(&c->journal); - bch2_io_clock_exit(&c->io_clock[WRITE]); - bch2_io_clock_exit(&c->io_clock[READ]); - bch2_fs_compress_exit(c); + bch2_fs_btree_interior_update_exit(c); + bch2_fs_btree_cache_exit(c); + bch2_fs_accounting_exit(c); + bch2_fs_async_obj_exit(c); bch2_journal_keys_put_initial(c); bch2_find_btree_nodes_exit(&c->found_btree_nodes); + BUG_ON(atomic_read(&c->journal_keys.ref)); - bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); if (c->online_reserved) { u64 v = percpu_u64_get(c->online_reserved); @@ -861,7 +863,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_fs_move_init(c); bch2_fs_nocow_locking_init_early(c); bch2_fs_quota_init(c); - bch2_fs_rebalance_init(c); bch2_fs_sb_errors_init_early(c); bch2_fs_snapshots_init_early(c); bch2_fs_subvolumes_init_early(c); @@ -983,6 +984,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_fs_fsio_init(c) ?: bch2_fs_fs_io_direct_init(c) ?: bch2_fs_io_read_init(c) ?: + bch2_fs_rebalance_init(c) ?: bch2_fs_sb_errors_init(c) ?: bch2_fs_vfs_init(c); if (ret) @@ -1741,11 +1743,12 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = fast_device_removal ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags) - : bch2_dev_data_drop(c, ca->dev_idx, flags); + : (bch2_dev_data_drop(c, ca->dev_idx, flags) ?: + bch2_dev_remove_stripes(c, ca->dev_idx, flags)); if (ret) goto err; - /* Check if device still has data */ + /* Check if device still has data before blowing away alloc info */ struct bch_dev_usage usage = bch2_dev_usage_read(ca); for (unsigned i = 0; i < BCH_DATA_NR; i++) if (!data_type_is_empty(i) && @@ -2159,7 +2162,7 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) m->nbuckets = cpu_to_le64(new_nbuckets); SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); - c->disk_sb.sb->features[0] &= ~BIT_ULL(BCH_FEATURE_small_image); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); bch2_write_super(c); mutex_unlock(&c->sb_lock); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index eafaa2c8..11da5e54 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -150,6 +150,7 @@ write_attribute(trigger_btree_key_cache_shrink); write_attribute(trigger_btree_updates); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_recalc_capacity); +write_attribute(trigger_delete_dead_snapshots); read_attribute(gc_gens_pos); __sysfs_attribute(read_fua_test, 0400); @@ -551,6 +552,9 @@ STORE(bch2_fs) up_read(&c->state_lock); } + if (attr == &sysfs_trigger_delete_dead_snapshots) + __bch2_delete_dead_snapshots(c); + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -680,6 +684,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_btree_updates, &sysfs_trigger_freelist_wakeup, &sysfs_trigger_recalc_capacity, + &sysfs_trigger_delete_dead_snapshots, &sysfs_gc_gens_pos, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 14cb2c7d..25cf61eb 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -14,6 +14,7 @@ #include <linux/log2.h> #include <linux/percpu.h> #include <linux/preempt.h> +#include <linux/random.h> #include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/vmalloc.h> |