summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-06-18 18:48:03 -0700
committerKent Overstreet <kent.overstreet@gmail.com>2015-06-18 18:48:03 -0700
commitacced30e87ef6a736f0677ecd472b5654e80f2d8 (patch)
tree2e98c4a647dd733b87fc4dc1a4e9029a488c9548
parentb9eabb7e6c019f4dcda5733035e39003ab84f984 (diff)
atomicrenames
-rw-r--r--AtomicRenames.mdwn184
1 files changed, 184 insertions, 0 deletions
diff --git a/AtomicRenames.mdwn b/AtomicRenames.mdwn
new file mode 100644
index 0000000..94ff121
--- /dev/null
+++ b/AtomicRenames.mdwn
@@ -0,0 +1,184 @@
+[[!format c """
+
+struct btree_insert_multi {
+ struct btree_iter iter;
+ struct bkey_i *k;
+};
+
+static void multi_lock_write(struct btree_insert_multi *m, unsigned nr)
+{
+ struct btree *b = m[nr].iter.nodes[0];
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ if (b == m[i].iter.nodes[0])
+ return; /* already locked */
+
+ btree_node_lock_for_insert(b, &m[nr].iter);
+}
+
+static void multi_unlock_write(struct btree_insert_multi *m, unsigned nr)
+{
+ struct btree *b = m[nr].iter.nodes[0];
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ if (b == m[i].iter.nodes[0])
+ return; /* already locked */
+
+ btree_node_unlock_write(b, &m[nr].iter);
+}
+
+int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr,
+ u64 *journal_seq, unsigned flags)
+{
+ struct cache_set *c = m[0].iter.c;
+ struct journal_res res = { 0, 0 };
+ struct btree_insert_multi *i;
+ struct btree_iter *split;
+ unsigned u64s = 0;
+
+ for (i = m; i < m + nr; i++)
+ u64s += jset_u64s(i->k->k.u64s);
+retry:
+ if (bch_journal_res_get(&c->journal, &res, u64s, u64s))
+ return -EIO;
+
+ for (i = m; i < m + nr; i++) {
+ struct btree *b = i->iter.nodes[0];
+
+ multi_lock_write(m, i - m);
+
+ if (!__have_enough_space(b, i->k->k.u64s))
+ goto split;
+ }
+
+ for (i = m; i < m + nr; i++)
+ BUG_ON(!btree_insert_key(&i->iter, i->iter.nodes[0],
+ &i->iter.node_iters[0],
+ &keylist_single(i->k), NULL,
+ &res, journal_seq, flags));
+
+ do {
+ multi_unlock_write(m, --i - m);
+ } while (i != m);
+
+ bch_journal_res_put(&c->journal, &res);
+
+ for (i = m; i < m + nr; i++)
+ bch_btree_node_write_lazy(i->iter.nodes[0], &i->iter);
+
+ return 0;
+split:
+ split = &i->iter;
+ do {
+ multi_unlock_write(m, i - m);
+ } while (i-- != m);
+
+ /*
+ * XXX: Do we need to drop our journal res for the split?
+ */
+ bch_journal_res_put(&c->journal, &res);
+
+ {
+ struct btree *b = split->nodes[0];
+ struct btree_split_state state;
+ int ret;
+
+ closure_init_stack(&state.stack_cl);
+ bch_keylist_init(&state.parent_keys,
+ state.inline_keys,
+ ARRAY_SIZE(state.inline_keys));
+
+ /*
+ * XXX: figure out how far we might need to split,
+ * instead of locking/reserving all the way to the root:
+ */
+ split->locks_want = BTREE_MAX_DEPTH;
+ if (!bch_btree_iter_upgrade(split))
+ return -EINTR;
+
+ state.reserve = bch_btree_reserve_get(c, b, split, 0,
+ !(flags & BTREE_INSERT_NOFAIL));
+ if (IS_ERR(state.reserve))
+ return PTR_ERR(state.reserve);
+
+ ret = btree_split(b, split, NULL, flags, &state);
+
+ bch_btree_reserve_put(c, state.reserve);
+
+ if (ret)
+ return ret;
+ goto retry;
+ }
+}
+
+int bch_dirent_rename(struct cache_set *c,
+ u64 src_dir, const struct qstr *src_name,
+ u64 dst_dir, const struct qstr *dst_name,
+ u64 *journal_seq, bool overwriting)
+{
+ struct btree_insert_multi trans[2];
+ struct btree_iter *src_iter = &trans[0].iter;
+ struct btree_iter *dst_iter = &trans[1].iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent src;
+ struct bkey_i_dirent *dst;
+ struct bkey_i delete;
+ struct keylist keys;
+ int ret;
+
+ dst = dirent_create_key(&keys, 0, dst_name, 0);
+ if (!dst)
+ return -ENOMEM;
+
+ bch_btree_iter_init_intent(src_iter, c, BTREE_ID_DIRENTS,
+ POS(src_dir, bch_dirent_hash(src_name)));
+ bch_btree_iter_init_intent(dst_iter, c, BTREE_ID_DIRENTS,
+ POS(dst_dir, bch_dirent_hash(dst_name)));
+ bch_btree_iter_link(src_iter, dst_iter);
+
+ do {
+ k = __dirent_find(src_iter, src_dir, src_name);
+ if (IS_ERR(k.k)) {
+ ret = PTR_ERR(k.k);
+ goto err;
+ }
+
+ src = bkey_s_c_to_dirent(k);
+
+ bkey_init(&delete.k);
+ delete.k.p = src.k->p;
+ delete.k.type = BCH_DIRENT_WHITEOUT;
+
+ k = overwriting
+ ? __dirent_find(dst_iter, dst_dir, dst_name)
+ : __dirent_find_hole(dst_iter, dst_dir, dst_name);
+ if (IS_ERR(k.k)) {
+ ret = PTR_ERR(k.k);
+ goto err;
+ }
+
+ dst->v.d_inum = src.v->d_inum;
+ dst->v.d_type = src.v->d_type;
+ dst->k.p = k.k->p;
+
+ trans[0].k = &delete;
+ trans[1].k = &dst->k_i;
+
+ ret = bch_btree_insert_at_multi(trans, ARRAY_SIZE(trans),
+ journal_seq, 0);
+ bch_btree_iter_unlock(src_iter);
+ bch_btree_iter_unlock(dst_iter);
+ } while (ret == -EINTR);
+
+ bch_keylist_free(&keys);
+ return 0;
+err:
+ ret = bch_btree_iter_unlock(src_iter) ?: ret;
+ ret = bch_btree_iter_unlock(dst_iter) ?: ret;
+ bch_keylist_free(&keys);
+ return ret;
+}
+
+"""]]