summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_io.c182
-rw-r--r--fs/bcachefs/btree_io.h11
-rw-r--r--fs/bcachefs/btree_iter.h2
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update.h4
-rw-r--r--fs/bcachefs/btree_update_interior.c194
-rw-r--r--fs/bcachefs/btree_update_leaf.c3
-rw-r--r--fs/bcachefs/io_types.h3
-rw-r--r--fs/bcachefs/migrate.c2
-rw-r--r--fs/bcachefs/recovery.c5
-rw-r--r--fs/bcachefs/super.c9
13 files changed, 229 insertions, 195 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 125cf0928bcf..051aba63eaa5 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -675,7 +675,7 @@ struct bch_fs {
struct btree_key_cache btree_key_cache;
struct workqueue_struct *btree_update_wq;
- struct workqueue_struct *btree_error_wq;
+ struct workqueue_struct *btree_io_complete_wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
@@ -826,8 +826,6 @@ struct bch_fs {
atomic64_t btree_writes_nr;
atomic64_t btree_writes_sectors;
- struct bio_list btree_write_error_list;
- struct work_struct btree_write_error_work;
spinlock_t btree_write_error_lock;
/* ERRORS */
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 94273d5161f2..23a359ec2c8f 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1210,7 +1210,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_inode_btree_change = 11,
bcachefs_metadata_version_snapshot = 12,
bcachefs_metadata_version_inode_backpointers = 13,
- bcachefs_metadata_version_max = 14,
+ bcachefs_metadata_version_btree_ptr_sectors_written = 14,
+ bcachefs_metadata_version_max = 15,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 957a6a9a1559..417f0fc00357 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -26,6 +26,7 @@ void bch2_btree_node_io_unlock(struct btree *b)
{
EBUG_ON(!btree_node_write_in_flight(b));
+ clear_btree_node_write_in_flight_inner(b);
clear_btree_node_write_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
}
@@ -870,7 +871,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned u64s;
- unsigned nonblacklisted_written = 0;
+ unsigned blacklisted_written, nonblacklisted_written = 0;
+ unsigned ptr_written = btree_ptr_sectors_written(&b->key);
int ret, retry_read = 0, write = READ;
b->version_ondisk = U16_MAX;
@@ -1013,8 +1015,21 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
* important that we start writing new bsets after the most recent _non_
* blacklisted bset:
*/
+ blacklisted_written = b->written;
b->written = nonblacklisted_written;
+ if (ptr_written) {
+ btree_err_on(b->written < ptr_written,
+ BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
+ "btree node data missing: expected %u sectors, found %u blacklisted %u",
+ ptr_written, b->written, blacklisted_written);
+
+ btree_err_on(b->written > ptr_written,
+ BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
+ "btree ptr not fully updated: expected %u sectors, found %u blacklisted %u",
+ ptr_written, b->written, blacklisted_written);
+ }
+
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
sorted->keys.u64s = 0;
@@ -1578,6 +1593,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
goto do_write;
new &= ~(1U << BTREE_NODE_write_in_flight);
+ new &= ~(1U << BTREE_NODE_write_in_flight_inner);
} while ((v = cmpxchg(&b->flags, old, new)) != old);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
@@ -1596,10 +1612,12 @@ do_write:
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
+ new |= (1U << BTREE_NODE_write_in_flight_inner);
new |= (1U << BTREE_NODE_just_written);
new ^= (1U << BTREE_NODE_write_idx);
} else {
new &= ~(1U << BTREE_NODE_write_in_flight);
+ new &= ~(1U << BTREE_NODE_write_in_flight_inner);
}
} while ((v = cmpxchg(&b->flags, old, new)) != old);
@@ -1609,52 +1627,38 @@ do_write:
six_unlock_read(&b->c.lock);
}
-static void bch2_btree_node_write_error(struct bch_fs *c,
- struct btree_write_bio *wbio)
+static void btree_node_write_work(struct work_struct *work)
{
+ struct btree_write_bio *wbio =
+ container_of(work, struct btree_write_bio, work);
+ struct bch_fs *c = wbio->wbio.c;
struct btree *b = wbio->wbio.bio.bi_private;
- struct bkey_buf k;
struct bch_extent_ptr *ptr;
- struct btree_trans trans;
- struct btree_iter *iter;
int ret;
- bch2_bkey_buf_init(&k);
- bch2_trans_init(&trans, c, 0, 0);
-
- iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
- BTREE_MAX_DEPTH, b->c.level, 0);
-retry:
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- goto err;
-
- /* has node been freed? */
- if (iter->l[b->c.level].b != b) {
- /* node has been freed: */
- BUG_ON(!btree_node_dying(b));
- goto out;
- }
-
- BUG_ON(!btree_node_hashed(b));
-
- bch2_bkey_buf_copy(&k, c, &b->key);
+ btree_bounce_free(c,
+ wbio->data_bytes,
+ wbio->wbio.used_mempool,
+ wbio->data);
- bch2_bkey_drop_ptrs(bkey_i_to_s(k.k), ptr,
+ bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
- if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k)))
+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key)))
goto err;
- ret = bch2_btree_node_update_key(&trans, iter, b, k.k);
- if (ret == -EINTR)
- goto retry;
- if (ret)
- goto err;
+ if (wbio->wbio.first_btree_write) {
+ if (wbio->wbio.failed.nr) {
+
+ }
+ } else {
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_btree_node_update_key_get_iter(&trans, b, &wbio->key,
+ !wbio->wbio.failed.nr));
+ if (ret)
+ goto err;
+ }
out:
- bch2_trans_iter_put(&trans, iter);
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&k, c);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b);
return;
@@ -1664,58 +1668,14 @@ err:
goto out;
}
-void bch2_btree_write_error_work(struct work_struct *work)
-{
- struct bch_fs *c = container_of(work, struct bch_fs,
- btree_write_error_work);
- struct bio *bio;
-
- while (1) {
- spin_lock_irq(&c->btree_write_error_lock);
- bio = bio_list_pop(&c->btree_write_error_list);
- spin_unlock_irq(&c->btree_write_error_lock);
-
- if (!bio)
- break;
-
- bch2_btree_node_write_error(c,
- container_of(bio, struct btree_write_bio, wbio.bio));
- }
-}
-
-static void btree_node_write_work(struct work_struct *work)
-{
- struct btree_write_bio *wbio =
- container_of(work, struct btree_write_bio, work);
- struct bch_fs *c = wbio->wbio.c;
- struct btree *b = wbio->wbio.bio.bi_private;
-
- btree_bounce_free(c,
- wbio->bytes,
- wbio->wbio.used_mempool,
- wbio->data);
-
- if (wbio->wbio.failed.nr) {
- unsigned long flags;
-
- spin_lock_irqsave(&c->btree_write_error_lock, flags);
- bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
- spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-
- queue_work(c->btree_error_wq, &c->btree_write_error_work);
- return;
- }
-
- bio_put(&wbio->wbio.bio);
- btree_node_write_done(c, b);
-}
-
static void btree_node_write_endio(struct bio *bio)
{
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
struct bch_write_bio *orig = parent ?: wbio;
+ struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio);
struct bch_fs *c = wbio->c;
+ struct btree *b = wbio->bio.bi_private;
struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
unsigned long flags;
@@ -1736,13 +1696,13 @@ static void btree_node_write_endio(struct bio *bio)
if (parent) {
bio_put(bio);
bio_endio(&parent->bio);
- } else {
- struct btree_write_bio *wb =
- container_of(orig, struct btree_write_bio, wbio);
-
- INIT_WORK(&wb->work, btree_node_write_work);
- queue_work(c->io_complete_wq, &wb->work);
+ return;
}
+
+ clear_btree_node_write_in_flight_inner(b);
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
+ INIT_WORK(&wb->work, btree_node_write_work);
+ queue_work(c->btree_io_complete_wq, &wb->work);
}
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
@@ -1767,8 +1727,15 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
static void btree_write_submit(struct work_struct *work)
{
struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work);
+ struct bch_extent_ptr *ptr;
+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+
+ bkey_copy(&tmp.k, &wbio->key);
+
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr)
+ ptr->offset += wbio->sector_offset;
- bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
+ bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k);
}
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
@@ -1778,7 +1745,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
- struct bch_extent_ptr *ptr;
struct sort_iter sort_iter;
struct nonce nonce;
unsigned bytes_to_write, sectors_to_write, bytes, u64s;
@@ -1818,6 +1784,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta
new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write);
new |= (1 << BTREE_NODE_write_in_flight);
+ new |= (1 << BTREE_NODE_write_in_flight_inner);
new |= (1 << BTREE_NODE_just_written);
new ^= (1 << BTREE_NODE_write_idx);
} while (cmpxchg_acquire(&b->flags, old, new) != old);
@@ -1969,37 +1936,30 @@ do_write:
struct btree_write_bio, wbio.bio);
wbio_init(&wbio->wbio.bio);
wbio->data = data;
- wbio->bytes = bytes;
+ wbio->data_bytes = bytes;
+ wbio->sector_offset = b->written;
wbio->wbio.c = c;
wbio->wbio.used_mempool = used_mempool;
+ wbio->wbio.first_btree_write = !b->written;
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META;
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
wbio->wbio.bio.bi_private = b;
bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
- /*
- * If we're appending to a leaf node, we don't technically need FUA -
- * this write just needs to be persisted before the next journal write,
- * which will be marked FLUSH|FUA.
- *
- * Similarly if we're writing a new btree root - the pointer is going to
- * be in the next journal entry.
- *
- * But if we're writing a new btree node (that isn't a root) or
- * appending to a non leaf btree node, we need either FUA or a flush
- * when we write the parent with the new pointer. FUA is cheaper than a
- * flush, and writes appending to leaf nodes aren't blocking anything so
- * just make all btree node writes FUA to keep things sane.
- */
-
bkey_copy(&wbio->key, &b->key);
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&wbio->key)), ptr)
- ptr->offset += b->written;
-
b->written += sectors_to_write;
+ if (wbio->wbio.first_btree_write &&
+ b->key.k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+ cpu_to_le16(b->written);
+
+ if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
+ cpu_to_le16(b->written);
+
atomic64_inc(&c->btree_writes_nr);
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
@@ -2008,6 +1968,10 @@ do_write:
return;
err:
set_btree_node_noevict(b);
+ if (!b->written &&
+ b->key.k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+ cpu_to_le16(sectors_to_write);
b->written += sectors_to_write;
nowrite:
btree_bounce_free(c, bytes, used_mempool, data);
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 3732d135de8d..7fdcf879c7d4 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -32,6 +32,13 @@ static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
atomic_dec(&c->btree_cache.dirty);
}
+static inline unsigned btree_ptr_sectors_written(struct bkey_i *k)
+{
+ return k->k.type == KEY_TYPE_btree_ptr_v2
+ ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written)
+ : 0;
+}
+
struct btree_read_bio {
struct bch_fs *c;
struct btree *b;
@@ -48,7 +55,8 @@ struct btree_write_bio {
struct work_struct work;
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
void *data;
- unsigned bytes;
+ unsigned data_bytes;
+ unsigned sector_offset;
struct bch_write_bio wbio;
};
@@ -137,7 +145,6 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id,
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
-void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 31175cf00c0a..84f91c9b68e3 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -130,7 +130,7 @@ void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
{
- unsigned new_locks_want = (iter->flags & BTREE_ITER_INTENT ? 1 : 0);
+ unsigned new_locks_want = iter->level + !!(iter->flags & BTREE_ITER_INTENT);
if (iter->locks_want > new_locks_want)
__bch2_btree_iter_downgrade(iter, new_locks_want);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a2581500b791..07c9ba4ea475 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -435,6 +435,7 @@ enum btree_flags {
BTREE_NODE_write_idx,
BTREE_NODE_accessed,
BTREE_NODE_write_in_flight,
+ BTREE_NODE_write_in_flight_inner,
BTREE_NODE_just_written,
BTREE_NODE_dying,
BTREE_NODE_fake,
@@ -449,6 +450,7 @@ BTREE_FLAG(noevict);
BTREE_FLAG(write_idx);
BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
+BTREE_FLAG(write_in_flight_inner);
BTREE_FLAG(just_written);
BTREE_FLAG(dying);
BTREE_FLAG(fake);
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 12065bba82dd..bab135fae0b0 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -74,7 +74,9 @@ int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
__le64, unsigned);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
- struct btree *, struct bkey_i *);
+ struct btree *, struct bkey_i *, bool);
+int bch2_btree_node_update_key_get_iter(struct btree_trans *,
+ struct btree *, struct bkey_i *, bool);
int bch2_trans_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, enum btree_update_flags);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 6b55a4108425..9f32a8892213 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -246,11 +246,7 @@ retry:
goto retry;
}
- if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
- bkey_btree_ptr_v2_init(&tmp.k);
- else
- bkey_btree_ptr_init(&tmp.k);
-
+ bkey_btree_ptr_v2_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
bch2_open_bucket_get(c, wp, &ob);
@@ -567,7 +563,8 @@ static void btree_update_nodes_written(struct btree_update *as)
six_unlock_read(&old->c.lock);
if (seq == as->old_nodes_seq[i])
- bch2_btree_node_wait_on_write(old);
+ wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -1148,6 +1145,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
struct bkey_packed *k;
const char *invalid;
+ BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
+ !btree_ptr_sectors_written(insert));
+
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
if (invalid) {
@@ -1390,6 +1390,7 @@ static void btree_split(struct btree_update *as,
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent);
bch2_btree_node_write(c, n2, SIX_LOCK_intent);
/*
@@ -1417,12 +1418,12 @@ static void btree_split(struct btree_update *as,
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent);
+
if (parent)
bch2_keylist_add(&as->parent_keys, &n1->key);
}
- bch2_btree_node_write(c, n1, SIX_LOCK_intent);
-
/* New nodes all written, now make them visible: */
if (parent) {
@@ -1698,13 +1699,13 @@ retry:
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent);
+
bkey_init(&delete.k);
delete.k.p = prev->key.k.p;
bch2_keylist_add(&as->parent_keys, &delete);
bch2_keylist_add(&as->parent_keys, &n->key);
- bch2_btree_node_write(c, n, SIX_LOCK_intent);
-
bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags);
bch2_btree_update_get_open_buckets(as, n);
@@ -1878,74 +1879,109 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
queue_work(c->btree_interior_update_worker, &a->work);
}
-static void __bch2_btree_node_update_key(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_iter *iter,
- struct btree *b, struct btree *new_hash,
- struct bkey_i *new_key)
+static int __bch2_btree_node_update_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree *b, struct btree *new_hash,
+ struct bkey_i *new_key,
+ bool skip_triggers)
{
- struct bch_fs *c = as->c;
+ struct bch_fs *c = trans->c;
+ struct btree_iter *iter2 = NULL;
struct btree *parent;
+ u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX];
int ret;
- btree_update_will_delete_key(as, &b->key);
- btree_update_will_add_key(as, new_key);
+ if (!skip_triggers) {
+ ret = bch2_trans_mark_key(trans,
+ bkey_s_c_null,
+ bkey_i_to_s_c(new_key),
+ BTREE_TRIGGER_INSERT);
+ if (ret)
+ return ret;
+
+ ret = bch2_trans_mark_key(trans,
+ bkey_i_to_s_c(&b->key),
+ bkey_s_c_null,
+ BTREE_TRIGGER_OVERWRITE);
+ if (ret)
+ return ret;
+ }
+
+ if (new_hash) {
+ bkey_copy(&new_hash->key, new_key);
+ ret = bch2_btree_node_hash_insert(&c->btree_cache,
+ new_hash, b->c.level, b->c.btree_id);
+ BUG_ON(ret);
+ }
parent = btree_node_parent(iter, b);
if (parent) {
- if (new_hash) {
- bkey_copy(&new_hash->key, new_key);
- ret = bch2_btree_node_hash_insert(&c->btree_cache,
- new_hash, b->c.level, b->c.btree_id);
- BUG_ON(ret);
- }
+ iter2 = bch2_trans_copy_iter(trans, iter);
- bch2_keylist_add(&as->parent_keys, new_key);
- bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, 0);
+ BUG_ON(iter2->level != b->c.level);
+ BUG_ON(bpos_cmp(iter2->pos, new_key->k.p));
- if (new_hash) {
- mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
+ btree_node_unlock(iter2, iter2->level);
+ iter2->l[iter2->level].b = BTREE_ITER_NO_NODE_UP;
+ iter2->level++;
- bch2_btree_node_hash_remove(&c->btree_cache, b);
-
- bkey_copy(&b->key, new_key);
- ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
- BUG_ON(ret);
- mutex_unlock(&c->btree_cache.lock);
- } else {
- bkey_copy(&b->key, new_key);
- }
+ ret = bch2_btree_iter_traverse(iter2) ?:
+ bch2_trans_update(trans, iter2, new_key, BTREE_TRIGGER_NORUN);
+ if (ret)
+ goto err;
} else {
BUG_ON(btree_node_root(c, b) != b);
- bch2_btree_node_lock_write(b, iter);
- bkey_copy(&b->key, new_key);
+ trans->extra_journal_entries = (void *) &journal_entries[0];
+ trans->extra_journal_entry_u64s =
+ journal_entry_set((void *) &journal_entries[0],
+ BCH_JSET_ENTRY_btree_root,
+ b->c.btree_id, b->c.level,
+ new_key, new_key->k.u64s);
+ }
- if (btree_ptr_hash_val(&b->key) != b->hash_val) {
- mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_JOURNAL_RECLAIM|
+ BTREE_INSERT_JOURNAL_RESERVED|
+ BTREE_INSERT_NOUNLOCK);
+ if (ret)
+ goto err;
- ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
- BUG_ON(ret);
- mutex_unlock(&c->btree_cache.lock);
- }
+ bch2_btree_node_lock_write(b, iter);
- btree_update_updated_root(as, b);
- bch2_btree_node_unlock_write(b, iter);
+ if (new_hash) {
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ bkey_copy(&b->key, new_key);
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+ BUG_ON(ret);
+ mutex_unlock(&c->btree_cache.lock);
+ } else {
+ bkey_copy(&b->key, new_key);
}
- bch2_btree_update_done(as);
+ bch2_btree_node_unlock_write(b, iter);
+out:
+ bch2_trans_iter_put(trans, iter2);
+ return ret;
+err:
+ if (new_hash) {
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_unlock(&c->btree_cache.lock);
+ }
+ goto out;
}
-int bch2_btree_node_update_key(struct btree_trans *trans,
- struct btree_iter *iter,
- struct btree *b,
- struct bkey_i *new_key)
+int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b, struct bkey_i *new_key,
+ bool skip_triggers)
{
struct bch_fs *c = trans->c;
- struct btree *parent = btree_node_parent(iter, b);
- struct btree_update *as = NULL;
struct btree *new_hash = NULL;
struct closure cl;
int ret = 0;
@@ -1959,27 +1995,18 @@ int bch2_btree_node_update_key(struct btree_trans *trans,
if (btree_ptr_hash_val(new_key) != b->hash_val) {
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
if (ret) {
- bch2_trans_unlock(iter->trans);
+ bch2_trans_unlock(trans);
closure_sync(&cl);
- if (!bch2_trans_relock(iter->trans))
+ if (!bch2_trans_relock(trans))
return -EINTR;
}
new_hash = bch2_btree_node_mem_alloc(c);
}
- as = bch2_btree_update_start(iter, b->c.level,
- parent ? btree_update_reserve_required(c, parent) : 0,
- BTREE_INSERT_NOFAIL);
- if (IS_ERR(as)) {
- ret = PTR_ERR(as);
- goto err;
- }
-
- __bch2_btree_node_update_key(as, trans, iter, b, new_hash, new_key);
+ ret = __bch2_btree_node_update_key(trans, iter, b, new_hash,
+ new_key, skip_triggers);
- bch2_btree_iter_downgrade(iter);
-err:
if (new_hash) {
mutex_lock(&c->btree_cache.lock);
list_move(&new_hash->list, &c->btree_cache.freeable);
@@ -1993,6 +2020,35 @@ err:
return ret;
}
+int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
+ struct btree *b, struct bkey_i *new_key,
+ bool skip_triggers)
+{
+ struct btree_iter *iter;
+ int ret;
+
+ iter = bch2_trans_get_node_iter(trans, b->c.btree_id, b->key.k.p,
+ BTREE_MAX_DEPTH, b->c.level,
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ goto out;
+
+ /* has node been freed? */
+ if (iter->l[b->c.level].b != b) {
+ /* node has been freed: */
+ BUG_ON(!btree_node_dying(b));
+ goto out;
+ }
+
+ BUG_ON(!btree_node_hashed(b));
+
+ ret = bch2_btree_node_update_key(trans, iter, b, new_key, skip_triggers);
+out:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
/* Init code: */
/*
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 0843e2c395aa..d319e27aed9f 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -890,7 +890,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
unsigned u64s, reset_flags = 0;
int ret = 0;
- if (!trans->nr_updates)
+ if (!trans->nr_updates &&
+ !trans->extra_journal_entry_u64s)
goto out_reset;
if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h
index e7aca7c9823a..0aab77951c4c 100644
--- a/fs/bcachefs/io_types.h
+++ b/fs/bcachefs/io_types.h
@@ -94,7 +94,8 @@ struct bch_write_bio {
bounce:1,
put_bio:1,
have_ioref:1,
- used_mempool:1;
+ used_mempool:1,
+ first_btree_write:1;
struct bio bio;
};
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index aacd6385db1f..1f65eca48c6e 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -139,7 +139,7 @@ retry:
break;
}
- ret = bch2_btree_node_update_key(&trans, iter, b, k.k);
+ ret = bch2_btree_node_update_key(&trans, iter, b, k.k, false);
if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(iter);
ret = 0;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index c6fa4ca31ae9..84e224fb0d01 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1005,6 +1005,11 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.fix_errors = FSCK_OPT_YES;
}
+ if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
+ bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required");
+ c->opts.version_upgrade = true;
+ }
+
ret = bch2_blacklist_table_initialize(c);
if (ret) {
bch_err(c, "error initializing blacklist table");
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 13a5ca713e7a..ce8e5d4843d0 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -507,8 +507,8 @@ static void __bch2_fs_free(struct bch_fs *c)
destroy_workqueue(c->io_complete_wq );
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
- if (c->btree_error_wq)
- destroy_workqueue(c->btree_error_wq);
+ if (c->btree_io_complete_wq)
+ destroy_workqueue(c->btree_io_complete_wq);
if (c->btree_update_wq)
destroy_workqueue(c->btree_update_wq);
@@ -560,7 +560,6 @@ void __bch2_fs_stop(struct bch_fs *c)
for_each_member_device(ca, c, i)
cancel_work_sync(&ca->io_error_work);
- cancel_work_sync(&c->btree_write_error_work);
cancel_work_sync(&c->read_only_work);
for (i = 0; i < c->sb.nr_devices; i++)
@@ -688,9 +687,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->bio_bounce_pages_lock);
- bio_list_init(&c->btree_write_error_list);
spin_lock_init(&c->btree_write_error_lock);
- INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
@@ -760,7 +757,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
- !(c->btree_error_wq = alloc_workqueue("bcachefs_error",
+ !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||