diff options
Diffstat (limited to 'libbcachefs/recovery.c')
-rw-r--r-- | libbcachefs/recovery.c | 104 |
1 files changed, 71 insertions, 33 deletions
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 9c30500c..130274b1 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -98,6 +98,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, unsigned update_flags = BTREE_TRIGGER_NORUN; int ret; + if (k->overwritten) + return 0; + + trans->journal_res.seq = k->journal_seq; + /* * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to * keep the key cache coherent with the underlying btree. Nothing @@ -139,27 +144,14 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) static int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - struct journal_key **keys_sorted, *k; + DARRAY(struct journal_key *) keys_sorted = { 0 }; + struct journal_key **kp; struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; - size_t i; + struct btree_trans *trans = bch2_trans_get(c); int ret; - move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); - keys->gap = keys->nr; - - keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL); - if (!keys_sorted) - return -BCH_ERR_ENOMEM_journal_replay; - - for (i = 0; i < keys->nr; i++) - keys_sorted[i] = &keys->d[i]; - - sort(keys_sorted, keys->nr, - sizeof(keys_sorted[0]), - journal_sort_seq_cmp, NULL); - if (keys->nr) { ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", keys->nr, start_seq, end_seq); @@ -167,27 +159,61 @@ static int bch2_journal_replay(struct bch_fs *c) goto err; } - for (i = 0; i < keys->nr; i++) { - k = keys_sorted[i]; + /* + * First, attempt to replay keys in sorted order. This is more + * efficient, but some might fail if that would cause a journal + * deadlock. + */ + for (size_t i = 0; i < keys->nr; i++) { + cond_resched(); + + struct journal_key *k = keys->d + i; + + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_journal_reclaim| + (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), + bch2_journal_replay_key(trans, k)); + BUG_ON(!ret && !k->overwritten); + if (ret) { + ret = darray_push(&keys_sorted, k); + if (ret) + goto err; + } + } + /* + * Now, replay any remaining keys in the order in which they appear in + * the journal, unpinning those journal entries as we go: + */ + sort(keys_sorted.data, keys_sorted.nr, + sizeof(keys_sorted.data[0]), + journal_sort_seq_cmp, NULL); + + darray_for_each(keys_sorted, kp) { cond_resched(); + struct journal_key *k = *kp; + replay_now_at(j, k->journal_seq); - ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL| - (!k->allocated - ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim - : 0), + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + (!k->allocated + ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim + : 0), bch2_journal_replay_key(trans, k)); - if (ret) { - bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", - bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret)); + bch_err_msg(c, ret, "while replaying key at btree %s level %u:", + bch2_btree_id_str(k->btree_id), k->level); + if (ret) goto err; - } + + BUG_ON(!k->overwritten); } + bch2_trans_put(trans); + trans = NULL; + replay_now_at(j, j->replay_journal_seq_end); j->replay_journal_seq = 0; @@ -198,10 +224,10 @@ static int bch2_journal_replay(struct bch_fs *c) if (keys->nr && !ret) bch2_journal_log_msg(c, "journal replay finished"); err: - kvfree(keys_sorted); - - if (ret) - bch_err_fn(c, ret); + if (trans) + bch2_trans_put(trans); + darray_exit(&keys_sorted); + bch_err_fn(c, ret); return ret; } @@ -468,7 +494,7 @@ err: noinline_for_stack static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { - int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, __bch2_fs_upgrade_for_subvolumes(trans)); if (ret) bch_err_fn(c, ret); @@ -489,7 +515,19 @@ static int bch2_check_allocations(struct bch_fs *c) static int bch2_set_may_go_rw(struct bch_fs *c) { + struct journal_keys *keys = &c->journal_keys; + + /* + * After we go RW, the journal keys buffer can't be modified (except for + * setting journal_key->overwritten: it will be accessed by multiple + * threads + */ + move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); + keys->gap = keys->nr; + set_bit(BCH_FS_MAY_GO_RW, &c->flags); + if (keys->nr) + return bch2_fs_read_write_early(c); return 0; } |