summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-05-24 15:53:15 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 21:39:32 -0900
commit5818a280d149df79f8ae76e48646c45b4dbb2f5d (patch)
treed6e0b73a400d2f7e4045b98e48a44038fefe06e2
parent60f5b5a0e67e6ab36955a13f0e429852299fbb99 (diff)
bcache: btree as hash table improvements
-rw-r--r--drivers/md/bcache/bkey.h7
-rw-r--r--drivers/md/bcache/dirent.c320
-rw-r--r--drivers/md/bcache/fs.c4
-rw-r--r--drivers/md/bcache/fs.h5
-rw-r--r--drivers/md/bcache/str_hash.h272
-rw-r--r--drivers/md/bcache/xattr.c247
6 files changed, 502 insertions, 353 deletions
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h
index 08b805408d8f..881c5ebea4f3 100644
--- a/drivers/md/bcache/bkey.h
+++ b/drivers/md/bcache/bkey.h
@@ -329,8 +329,11 @@ static inline void bkey_reassemble(struct bkey_i *dst,
memcpy(&dst->v, src.v, bkey_val_bytes(src.k));
}
-#define bkey_s_null ((struct bkey_s) { .k = NULL, .v = NULL })
-#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL, .v = NULL })
+#define bkey_s_null ((struct bkey_s) { .k = NULL })
+#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL })
+
+#define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) })
+#define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) })
static inline struct bkey_s bkey_to_s(struct bkey *k)
{
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index 40d49f8593f3..6b323912e810 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -8,43 +8,72 @@
#include "keylist.h"
#include "str_hash.h"
-static struct bpos bch_dirent_pos(struct bch_inode_info *ei,
- const struct qstr *name)
+static unsigned dirent_name_bytes(struct bkey_s_c_dirent d)
{
- struct bch_str_hash_ctx ctx;
- u64 hash;
+ unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent);
- bch_str_hash_init(&ctx, ei->str_hash_type);
+ while (len && !d.v->d_name[len - 1])
+ --len;
+
+ return len;
+}
+
+static u64 bch_dirent_hash(const struct bch_hash_info *info,
+ const struct qstr *name)
+{
+ struct bch_str_hash_ctx ctx;
- bch_str_hash_update(&ctx, ei->str_hash_type,
- &ei->str_hash_seed, sizeof(ei->str_hash_seed));
- bch_str_hash_update(&ctx, ei->str_hash_type, name->name, name->len);
+ bch_str_hash_init(&ctx, info->type);
+ bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed));
- hash = bch_str_hash_end(&ctx, ei->str_hash_type);
+ bch_str_hash_update(&ctx, info->type, name->name, name->len);
/* [0,2) reserved for dots */
+ return max_t(u64, bch_str_hash_end(&ctx, info->type), 2);
+}
- return POS(ei->vfs_inode.i_ino, hash >= 2 ? hash : 2);
+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
+{
+ return bch_dirent_hash(info, key);
}
-static unsigned dirent_name_bytes(struct bkey_s_c_dirent d)
+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
{
- unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent);
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d));
- while (len && !d.v->d_name[len - 1])
- --len;
+ return bch_dirent_hash(info, &name);
+}
- return len;
+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+ int len = dirent_name_bytes(l);
+ const struct qstr *r = _r;
+
+ return len - r->len ?: memcmp(l.v->d_name, r->name, len);
}
-static int dirent_cmp(struct bkey_s_c_dirent d,
- const struct qstr *q)
+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
{
- int len = dirent_name_bytes(d);
+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+ struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
+ int l_len = dirent_name_bytes(l);
+ int r_len = dirent_name_bytes(r);
- return len - q->len ?: memcmp(d.v->d_name, q->name, len);
+ return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
}
+static const struct bch_hash_desc dirent_hash_desc = {
+ .btree_id = BTREE_ID_DIRENTS,
+ .key_type = BCH_DIRENT,
+ .whiteout_type = BCH_DIRENT_WHITEOUT,
+ .hash_key = dirent_hash_key,
+ .hash_bkey = dirent_hash_bkey,
+ .cmp_key = dirent_cmp_key,
+ .cmp_bkey = dirent_cmp_bkey,
+};
+
static const char *bch_dirent_invalid(const struct cache_set *c,
struct bkey_s_c k)
{
@@ -98,60 +127,15 @@ const struct bkey_ops bch_bkey_dirent_ops = {
.val_to_text = bch_dirent_to_text,
};
-static bool dirent_needs_whiteout(struct bch_inode_info *ei,
- struct btree_iter *iter)
-{
- struct btree *b = iter->nodes[0];
- /*
- * hack: we don't want to advance @iter, because caller is about to
- * insert at @iter's current position and we can't just rewind it - so,
- * just copy the node iter
- */
- struct btree_node_iter node_iter = iter->node_iters[0];
- struct bkey_s_c k;
- struct bkey u;
- struct bpos cur_pos = iter->pos;
-
- bch_btree_node_iter_advance(&node_iter, &b->keys);
- cur_pos = bkey_successor(cur_pos);
-
- while ((k = bch_btree_node_iter_next_unpack(&node_iter, &b->keys, &u)).k &&
- k.k->p.inode == iter->pos.inode &&
- !bkey_cmp(k.k->p, cur_pos)) {
- if (k.k->type == BCH_DIRENT) {
- struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
- struct qstr name = {
- .name = d.v->d_name,
- .len = dirent_name_bytes(d),
- };
-
- if (bkey_cmp(bch_dirent_pos(ei, &name), iter->pos) <= 0)
- return true;
- }
-
- cur_pos = bkey_successor(cur_pos);
- }
-
- /*
- * end of node, can't (yet) check the next one (would have to keep it
- * locked while we do the deletion)
- */
- if (bkey_cmp(cur_pos, b->key.k.p) > 0)
- return true;
-
- return false;
-}
-
static struct bkey_i_dirent *dirent_create_key(u8 type,
- const struct qstr *name,
- u64 dst)
+ const struct qstr *name, u64 dst)
{
struct bkey_i_dirent *dirent;
unsigned u64s = BKEY_U64s +
DIV_ROUND_UP(sizeof(struct bch_dirent) + name->len,
sizeof(u64));
- dirent = kmalloc(u64s * sizeof(u64), GFP_KERNEL);
+ dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (!dirent)
return NULL;
@@ -166,74 +150,15 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
(sizeof(struct bch_dirent) + name->len));
EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
- EBUG_ON(dirent_cmp(dirent_i_to_s_c(dirent), name));
return dirent;
}
-static struct bkey_s_c __dirent_find(struct btree_iter *iter,
- u64 dir, const struct qstr *name)
-{
- struct bkey_s_c k;
-
- while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
- if (k.k->p.inode != dir)
- break;
-
- switch (k.k->type) {
- case BCH_DIRENT:
- if (!dirent_cmp(bkey_s_c_to_dirent(k), name))
- return k;
-
- /* hash collision, keep going */
- break;
- case BCH_DIRENT_WHITEOUT:
- /* hash collision, keep going */
- break;
- default:
- /* hole, not found */
- goto not_found;
- }
-
- bch_btree_iter_advance_pos(iter);
- }
-not_found:
- return (struct bkey_s_c) { .k = ERR_PTR(-ENOENT) };
-}
-
-static struct bkey_s_c __dirent_find_hole(struct btree_iter *iter,
- u64 dir, const struct qstr *name)
-{
- struct bkey_s_c k;
-
- while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
- if (k.k->p.inode != dir)
- break;
-
- switch (k.k->type) {
- case BCH_DIRENT:
- if (!dirent_cmp(bkey_s_c_to_dirent(k), name))
- return (struct bkey_s_c) { .k = ERR_PTR(-EEXIST) };
-
- /* hash collision, keep going */
- break;
- default:
- return k;
- }
-
- bch_btree_iter_advance_pos(iter);
- }
-
- return (struct bkey_s_c) { .k = ERR_PTR(-ENOSPC) };
-}
-
int bch_dirent_create(struct inode *dir, u8 type,
const struct qstr *name, u64 dst_inum)
{
struct cache_set *c = dir->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(dir);
- struct btree_iter iter;
- struct bkey_s_c k;
struct bkey_i_dirent *dirent;
int ret;
@@ -241,28 +166,9 @@ int bch_dirent_create(struct inode *dir, u8 type,
if (!dirent)
return -ENOMEM;
- bch_btree_iter_init_intent(&iter, c, BTREE_ID_DIRENTS,
- bch_dirent_pos(ei, name));
-
- do {
- k = __dirent_find_hole(&iter, dir->i_ino, name);
- if (IS_ERR(k.k)) {
- ret = bch_btree_iter_unlock(&iter) ?: PTR_ERR(k.k);
- break;
- }
-
- dirent->k.p = k.k->p;
-
- ret = bch_btree_insert_at(&iter, &dirent->k_i,
- NULL, NULL, &ei->journal_seq,
- BTREE_INSERT_ATOMIC);
- /*
- * XXX: if we ever cleanup whiteouts, we may need to rewind
- * iterator on -EINTR
- */
- } while (ret == -EINTR);
-
- bch_btree_iter_unlock(&iter);
+ ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c,
+ ei->vfs_inode.i_ino, &ei->journal_seq,
+ &dirent->k_i, 0);
kfree(dirent);
return ret;
@@ -275,6 +181,12 @@ static void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
+static struct bpos bch_dirent_pos(struct bch_inode_info *ei,
+ const struct qstr *name)
+{
+ return POS(ei->vfs_inode.i_ino, bch_dirent_hash(&ei->str_hash, name));
+}
+
int bch_dirent_rename(struct cache_set *c,
struct inode *src_dir, const struct qstr *src_name,
struct inode *dst_dir, const struct qstr *dst_name,
@@ -282,13 +194,13 @@ int bch_dirent_rename(struct cache_set *c,
{
struct bch_inode_info *src_ei = to_bch_ei(src_dir);
struct bch_inode_info *dst_ei = to_bch_ei(dst_dir);
- struct btree_iter src_iter;
- struct btree_iter dst_iter;
+ struct btree_iter src_iter, dst_iter, whiteout_iter;
struct bkey_s_c old_src, old_dst;
struct bkey delete;
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
struct bpos src_pos = bch_dirent_pos(src_ei, src_name);
struct bpos dst_pos = bch_dirent_pos(dst_ei, dst_name);
+ bool need_whiteout;
int ret = -ENOMEM;
if (mode == BCH_RENAME_EXCHANGE) {
@@ -323,23 +235,38 @@ int bch_dirent_rename(struct cache_set *c,
* lock ordering.
*/
if (bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
- old_src = __dirent_find(&src_iter, src_dir->i_ino,
- src_name);
-
+ old_src = bch_hash_lookup_at(dirent_hash_desc,
+ &src_ei->str_hash,
+ &src_iter, src_name);
+
+ need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc,
+ &src_ei->str_hash,
+ &whiteout_iter, &src_iter);
+
+ /*
+ * Note that in BCH_RENAME mode, we're _not_ checking if
+ * the target already exists - we're relying on the VFS
+ * to do that check for us for correctness:
+ */
old_dst = mode == BCH_RENAME
- ? __dirent_find_hole(&dst_iter, dst_dir->i_ino,
- dst_name)
- : __dirent_find(&dst_iter, dst_dir->i_ino,
- dst_name);
+ ? bch_hash_hole_at(dirent_hash_desc, &dst_iter)
+ : bch_hash_lookup_at(dirent_hash_desc,
+ &dst_ei->str_hash,
+ &dst_iter, dst_name);
} else {
old_dst = mode == BCH_RENAME
- ? __dirent_find_hole(&dst_iter, dst_dir->i_ino,
- dst_name)
- : __dirent_find(&dst_iter, dst_dir->i_ino,
- dst_name);
-
- old_src = __dirent_find(&src_iter, src_dir->i_ino,
- src_name);
+ ? bch_hash_hole_at(dirent_hash_desc, &dst_iter)
+ : bch_hash_lookup_at(dirent_hash_desc,
+ &dst_ei->str_hash,
+ &dst_iter, dst_name);
+
+ old_src = bch_hash_lookup_at(dirent_hash_desc,
+ &src_ei->str_hash,
+ &src_iter, src_name);
+
+ need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc,
+ &src_ei->str_hash,
+ &whiteout_iter, &src_iter);
}
if (IS_ERR(old_src.k)) {
@@ -368,9 +295,9 @@ int bch_dirent_rename(struct cache_set *c,
* were going to delete:
*
* Note: this is a correctness issue, in this
- * situation dirent_needs_whiteout() could return
- * false when the whiteout would have been
- * needed if we inserted at the pos
+ * situation bch_hash_needs_whiteout() could
+ * return false when the whiteout would have
+ * been needed if we inserted at the pos
* __dirent_find_hole() found
*/
new_dst->k.p = src_iter.pos;
@@ -381,7 +308,7 @@ int bch_dirent_rename(struct cache_set *c,
goto insert_done;
}
- if (dirent_needs_whiteout(src_ei, &src_iter))
+ if (need_whiteout)
new_src->k.type = BCH_DIRENT_WHITEOUT;
break;
case BCH_RENAME_OVERWRITE:
@@ -392,12 +319,13 @@ int bch_dirent_rename(struct cache_set *c,
bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
/*
* Same case described above -
- * dirent_needs_whiteout could spuriously return
- * false, but we have to insert at dst_iter.pos
- * because we're overwriting another dirent:
+ * bch_hash_needs_whiteout could spuriously
+ * return false, but we have to insert at
+ * dst_iter.pos because we're overwriting
+ * another dirent:
*/
new_src->k.type = BCH_DIRENT_WHITEOUT;
- } else if (dirent_needs_whiteout(src_ei, &src_iter))
+ } else if (need_whiteout)
new_src->k.type = BCH_DIRENT_WHITEOUT;
break;
case BCH_RENAME_EXCHANGE:
@@ -417,6 +345,7 @@ int bch_dirent_rename(struct cache_set *c,
NULL, NULL, journal_seq,
BTREE_INSERT_ATOMIC);
insert_done:
+ bch_btree_iter_unlink(&whiteout_iter);
bch_btree_iter_unlock(&src_iter);
bch_btree_iter_unlock(&dst_iter);
@@ -444,38 +373,10 @@ int bch_dirent_delete(struct inode *dir, const struct qstr *name)
{
struct cache_set *c = dir->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(dir);
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_i delete;
- int ret = -ENOENT;
- bch_btree_iter_init_intent(&iter, c, BTREE_ID_DIRENTS,
- bch_dirent_pos(ei, name));
-
- do {
- k = __dirent_find(&iter, dir->i_ino, name);
- if (IS_ERR(k.k))
- return bch_btree_iter_unlock(&iter) ?: PTR_ERR(k.k);
-
- bkey_init(&delete.k);
- delete.k.p = k.k->p;
- delete.k.type = dirent_needs_whiteout(ei, &iter)
- ? BCH_DIRENT_WHITEOUT
- : KEY_TYPE_DELETED;
-
- ret = bch_btree_insert_at(&iter, &delete,
- NULL, NULL, &ei->journal_seq,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_ATOMIC);
- /*
- * XXX: if we ever cleanup whiteouts, we may need to rewind
- * iterator on -EINTR
- */
- } while (ret == -EINTR);
-
- bch_btree_iter_unlock(&iter);
-
- return ret;
+ return bch_hash_delete(dirent_hash_desc, &ei->str_hash,
+ c, ei->vfs_inode.i_ino,
+ &ei->journal_seq, name);
}
u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name)
@@ -484,15 +385,16 @@ u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name)
struct bch_inode_info *ei = to_bch_ei(dir);
struct btree_iter iter;
struct bkey_s_c k;
- u64 inum = 0;
+ u64 inum;
- bch_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
- bch_dirent_pos(ei, name));
-
- k = __dirent_find(&iter, dir->i_ino, name);
- if (!IS_ERR(k.k))
- inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
+ k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c,
+ ei->vfs_inode.i_ino, &iter, name);
+ if (IS_ERR(k.k)) {
+ bch_btree_iter_unlock(&iter);
+ return 0;
+ }
+ inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
bch_btree_iter_unlock(&iter);
return inum;
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index 182e81849de7..aa8037b9b226 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -989,8 +989,8 @@ static void bch_inode_init(struct bch_inode_info *ei,
inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime));
bch_inode_flags_to_vfs(inode);
- ei->str_hash_seed = le64_to_cpu(bi->i_hash_seed);
- ei->str_hash_type = INODE_STR_HASH_TYPE(bi);
+ ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed);
+ ei->str_hash.type = INODE_STR_HASH_TYPE(bi);
inode->i_mapping->a_ops = &bch_address_space_operations;
diff --git a/drivers/md/bcache/fs.h b/drivers/md/bcache/fs.h
index be172602ac98..e8f627c6ba45 100644
--- a/drivers/md/bcache/fs.h
+++ b/drivers/md/bcache/fs.h
@@ -1,6 +1,8 @@
#ifndef _BCACHE_FS_H
#define _BCACHE_FS_H
+#include "str_hash.h"
+
#include <linux/seqlock.h>
struct bch_inode_info {
@@ -21,8 +23,7 @@ struct bch_inode_info {
atomic_long_t i_sectors_dirty_count;
atomic64_t i_sectors;
- u64 str_hash_seed;
- u8 str_hash_type;
+ struct bch_hash_info str_hash;
};
#define to_bch_ei(_inode) \
diff --git a/drivers/md/bcache/str_hash.h b/drivers/md/bcache/str_hash.h
index e25862e9bc2d..08bd12c6ffe7 100644
--- a/drivers/md/bcache/str_hash.h
+++ b/drivers/md/bcache/str_hash.h
@@ -1,5 +1,9 @@
+#ifndef _BCACHE_STR_HASH_H
+#define _BCACHE_STR_HASH_H
+#include "btree_iter.h"
#include "siphash.h"
+
#include <crypto/sha1_base.h>
#include <linux/crc32c.h>
@@ -82,3 +86,271 @@ static inline u64 bch_str_hash_end(struct bch_str_hash_ctx *ctx,
BUG();
}
}
+
+struct bch_hash_info {
+ u64 seed;
+ u8 type;
+};
+
+struct bch_hash_desc {
+ enum btree_id btree_id;
+ u8 key_type;
+ u8 whiteout_type;
+
+ u64 (*hash_key)(const struct bch_hash_info *, const void *);
+ u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
+ bool (*cmp_key)(struct bkey_s_c, const void *);
+ bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
+};
+
+static inline struct bkey_s_c
+bch_hash_lookup_at(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct btree_iter *iter, const void *search)
+{
+ struct bkey_s_c k;
+ u64 inode = iter->pos.inode;
+
+ while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ if (k.k->p.inode != inode)
+ break;
+
+ if (k.k->type == desc.key_type) {
+ if (!desc.cmp_key(k, search))
+ return k;
+ } else if (k.k->type == desc.whiteout_type) {
+ ;
+ } else {
+ /* hole, not found */
+ break;
+ }
+
+ bch_btree_iter_advance_pos(iter);
+ }
+
+ return bkey_s_c_err(-ENOENT);
+}
+
+static inline struct bkey_s_c
+bch_hash_lookup_bkey_at(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct btree_iter *iter, struct bkey_s_c search)
+{
+ struct bkey_s_c k;
+ u64 inode = iter->pos.inode;
+
+ while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ if (k.k->p.inode != inode)
+ break;
+
+ if (k.k->type == desc.key_type) {
+ if (!desc.cmp_bkey(k, search))
+ return k;
+ } else if (k.k->type == desc.whiteout_type) {
+ ;
+ } else {
+ /* hole, not found */
+ break;
+ }
+
+ bch_btree_iter_advance_pos(iter);
+ }
+
+ return bkey_s_c_err(-ENOENT);
+}
+
+static inline struct bkey_s_c
+bch_hash_lookup(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct cache_set *c, u64 inode,
+ struct btree_iter *iter, const void *key)
+{
+ bch_btree_iter_init(iter, c, desc.btree_id,
+ POS(inode, desc.hash_key(info, key)));
+
+ return bch_hash_lookup_at(desc, info, iter, key);
+}
+
+static inline struct bkey_s_c
+bch_hash_lookup_intent(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct cache_set *c, u64 inode,
+ struct btree_iter *iter, const void *key)
+{
+ bch_btree_iter_init_intent(iter, c, desc.btree_id,
+ POS(inode, desc.hash_key(info, key)));
+
+ return bch_hash_lookup_at(desc, info, iter, key);
+}
+
+static inline struct bkey_s_c
+bch_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter)
+{
+ struct bkey_s_c k;
+ u64 inode = iter->pos.inode;
+
+ while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ if (k.k->p.inode != inode)
+ break;
+
+ if (k.k->type != desc.key_type)
+ return k;
+
+ /* hash collision, keep going */
+ bch_btree_iter_advance_pos(iter);
+ }
+
+ return bkey_s_c_err(-ENOSPC);
+}
+
+static inline struct bkey_s_c bch_hash_hole(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct cache_set *c, u64 inode,
+ struct btree_iter *iter,
+ const void *key)
+{
+ bch_btree_iter_init_intent(iter, c, desc.btree_id,
+ POS(inode, desc.hash_key(info, key)));
+
+ return bch_hash_hole_at(desc, iter);
+}
+
+static inline bool bch_hash_needs_whiteout(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct btree_iter *iter,
+ struct btree_iter *start)
+{
+ struct bkey_s_c k;
+
+ bch_btree_iter_init_copy(iter, start);
+
+ while (1) {
+ bch_btree_iter_advance_pos(iter);
+ k = bch_btree_iter_peek_with_holes(iter);
+
+ if (!k.k)
+ return iter->error ? true : false;
+
+ if (k.k->type != desc.key_type &&
+ k.k->type != desc.whiteout_type)
+ return false;
+
+ if (k.k->type == desc.key_type &&
+ desc.hash_bkey(info, k) <= start->pos.offset)
+ return true;
+ }
+}
+
+#define BCH_HASH_SET_MUST_CREATE 1
+#define BCH_HASH_SET_MUST_REPLACE 2
+
+static inline int bch_hash_set(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct cache_set *c, u64 inode,
+ u64 *journal_seq,
+ struct bkey_i *insert, int flags)
+{
+ struct btree_iter iter, hashed_slot;
+ struct bkey_s_c k;
+ int ret;
+
+ bch_btree_iter_init_intent(&hashed_slot, c, desc.btree_id,
+ POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))));
+
+ do {
+ ret = bch_btree_iter_traverse(&hashed_slot);
+ if (ret)
+ break;
+
+ /*
+ * If there's a hash collision and we have to insert at a
+ * different slot, on -EINTR (insert had to drop locks) we have
+ * to recheck the slot we hashed to - it could have been deleted
+ * while we dropped locks:
+ */
+ bch_btree_iter_init_copy(&iter, &hashed_slot);
+ k = bch_hash_lookup_bkey_at(desc, info, &iter,
+ bkey_i_to_s_c(insert));
+ if (IS_ERR(k.k)) {
+ if (flags & BCH_HASH_SET_MUST_REPLACE) {
+ ret = -ENOENT;
+ goto err_unlink;
+ }
+
+ /*
+ * Not found, so we're now looking for any open
+ * slot - we might have skipped over a whiteout
+ * that we could have used, so restart from the
+ * slot we hashed to:
+ */
+ bch_btree_iter_unlink(&iter);
+ bch_btree_iter_init_copy(&iter, &hashed_slot);
+
+ k = bch_hash_hole_at(desc, &iter);
+ if (IS_ERR(k.k)) {
+ ret = PTR_ERR(k.k);
+ goto err_unlink;
+ }
+ } else {
+ if (flags & BCH_HASH_SET_MUST_CREATE) {
+ ret = -EEXIST;
+ goto err_unlink;
+ }
+ }
+
+ insert->k.p = iter.pos;
+ ret = bch_btree_insert_at(&iter, insert, NULL, NULL,
+ journal_seq, BTREE_INSERT_ATOMIC);
+err_unlink:
+ ret = bch_btree_iter_unlink(&iter) ?: ret;
+ } while (ret == -EINTR);
+
+ bch_btree_iter_unlock(&hashed_slot);
+ return ret;
+}
+
+static inline int bch_hash_delete(const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct cache_set *c, u64 inode,
+ u64 *journal_seq, const void *key)
+{
+ struct btree_iter iter, whiteout_iter;
+ struct bkey_s_c k;
+ struct bkey_i delete;
+ int ret = -ENOENT;
+
+ bch_btree_iter_init_intent(&iter, c, desc.btree_id,
+ POS(inode, desc.hash_key(info, key)));
+
+ do {
+ k = bch_hash_lookup_at(desc, info, &iter, key);
+ if (IS_ERR(k.k))
+ return bch_btree_iter_unlock(&iter) ?: -ENOENT;
+
+ bkey_init(&delete.k);
+ delete.k.p = k.k->p;
+ delete.k.type = bch_hash_needs_whiteout(desc, info,
+ &whiteout_iter, &iter)
+ ? desc.whiteout_type
+ : KEY_TYPE_DELETED;
+
+ ret = bch_btree_insert_at(&iter, &delete,
+ NULL, NULL, journal_seq,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_ATOMIC);
+ /*
+ * Need to hold whiteout iter locked while we do the delete, if
+ * we're not leaving a whiteout:
+ */
+ bch_btree_iter_unlink(&whiteout_iter);
+ /*
+ * XXX: if we ever cleanup whiteouts, we may need to rewind
+ * iterator on -EINTR
+ */
+ } while (ret == -EINTR);
+
+ bch_btree_iter_unlock(&iter);
+ return ret;
+}
+
+#endif /* _BCACHE_STR_HASH_H */
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index 3bfe616d817e..bfaf2875e229 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -4,39 +4,79 @@
#include "btree_update.h"
#include "extents.h"
#include "fs.h"
-#include "keylist.h"
#include "str_hash.h"
#include "xattr.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
-static struct bpos bch_xattr_pos(struct bch_inode_info *ei,
- const struct qstr *name, u8 type)
+struct xattr_search_key {
+ u8 type;
+ struct qstr name;
+};
+
+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \
+ { .type = _type, .name = QSTR_INIT(_name, _len) })
+
+static u64 bch_xattr_hash(const struct bch_hash_info *info,
+ const struct xattr_search_key *key)
{
struct bch_str_hash_ctx ctx;
- bch_str_hash_init(&ctx, ei->str_hash_type);
+ bch_str_hash_init(&ctx, info->type);
+ bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed));
- bch_str_hash_update(&ctx, ei->str_hash_type,
- &ei->str_hash_seed, sizeof(ei->str_hash_seed));
- bch_str_hash_update(&ctx, ei->str_hash_type, &type, sizeof(type));
- bch_str_hash_update(&ctx, ei->str_hash_type, name->name, name->len);
+ bch_str_hash_update(&ctx, info->type, &key->type, sizeof(key->type));
+ bch_str_hash_update(&ctx, info->type, key->name.name, key->name.len);
- return POS(ei->vfs_inode.i_ino,
- bch_str_hash_end(&ctx, ei->str_hash_type));
+ return bch_str_hash_end(&ctx, info->type);
}
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
-static int xattr_cmp(const struct bch_xattr *xattr,
- u8 type, const struct qstr *q)
+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
+{
+ return bch_xattr_hash(info, key);
+}
+
+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
{
- return xattr->x_type != type ||
- xattr->x_name_len != q->len ||
- memcmp(xattr->x_name, q->name, q->len);
+ struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
+
+ return bch_xattr_hash(info,
+ &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
+}
+
+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+ const struct xattr_search_key *r = _r;
+
+ return l.v->x_type != r->type ||
+ l.v->x_name_len != r->name.len ||
+ memcmp(l.v->x_name, r->name.name, r->name.len);
}
+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+ struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
+
+ return l.v->x_type != r.v->x_type ||
+ l.v->x_name_len != r.v->x_name_len ||
+ memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
+}
+
+static const struct bch_hash_desc xattr_hash_desc = {
+ .btree_id = BTREE_ID_XATTRS,
+ .key_type = BCH_XATTR,
+ .whiteout_type = BCH_XATTR_WHITEOUT,
+ .hash_key = xattr_hash_key,
+ .hash_bkey = xattr_hash_bkey,
+ .cmp_key = xattr_cmp_key,
+ .cmp_bkey = xattr_cmp_bkey,
+};
+
static const char *bch_xattr_invalid(const struct cache_set *c,
struct bkey_s_c k)
{
@@ -107,41 +147,26 @@ int bch_xattr_get(struct inode *inode, const char *name,
{
struct cache_set *c = inode->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(inode);
- struct qstr qname = (struct qstr) QSTR_INIT(name, strlen(name));
struct btree_iter iter;
struct bkey_s_c k;
- const struct bch_xattr *xattr;
- int ret = -ENODATA;
-
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_XATTRS,
- bch_xattr_pos(ei, &qname, type), k) {
- switch (k.k->type) {
- case BCH_XATTR:
- xattr = bkey_s_c_to_xattr(k).v;
-
- /* collision? */
- if (!xattr_cmp(xattr, type, &qname)) {
- unsigned len = le16_to_cpu(xattr->x_val_len);
-
- ret = len;
- if (buffer) {
- if (len > size)
- ret = -ERANGE;
- else
- memcpy(buffer, xattr_val(xattr),
- len);
- }
- goto out;
- }
- break;
- case BCH_XATTR_WHITEOUT:
- break;
- default:
- /* hole, not found */
- goto out;
- }
+ struct bkey_s_c_xattr xattr;
+ int ret;
+
+ k = bch_hash_lookup(xattr_hash_desc, &ei->str_hash, c,
+ ei->vfs_inode.i_ino, &iter,
+ &X_SEARCH(type, name, strlen(name)));
+ if (IS_ERR(k.k))
+ return bch_btree_iter_unlock(&iter) ?: -ENODATA;
+
+ xattr = bkey_s_c_to_xattr(k);
+ ret = le16_to_cpu(xattr.v->x_val_len);
+ if (buffer) {
+ if (ret > size)
+ ret = -ERANGE;
+ else
+ memcpy(buffer, xattr_val(xattr.v), ret);
}
-out:
+
bch_btree_iter_unlock(&iter);
return ret;
}
@@ -152,100 +177,46 @@ int bch_xattr_set(struct inode *inode, const char *name,
{
struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct qstr qname = (struct qstr) QSTR_INIT((char *) name,
- strlen(name));
- int ret = -EINVAL;
- unsigned insert_flags = BTREE_INSERT_ATOMIC;
-
- if (!value)
- insert_flags |= BTREE_INSERT_NOFAIL;
-
- bch_btree_iter_init_intent(&iter, c, BTREE_ID_XATTRS,
- bch_xattr_pos(ei, &qname, type));
-
- while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
- switch (k.k->type) {
- case BCH_XATTR:
- /* collision? */
- if (xattr_cmp(bkey_s_c_to_xattr(k).v, type, &qname)) {
- bch_btree_iter_advance_pos(&iter);
- continue;
- }
-
- if (flags & XATTR_CREATE) {
- ret = -EEXIST;
- goto out;
- }
-
- break;
- case BCH_XATTR_WHITEOUT:
- bch_btree_iter_advance_pos(&iter);
- continue;
- default:
- /* hole, not found */
- if (flags & XATTR_REPLACE) {
- ret = -ENODATA;
- goto out;
- }
- break;
- }
-
- if (value) {
- struct keylist keys;
- struct bkey_i_xattr *xattr;
- unsigned u64s = BKEY_U64s +
- DIV_ROUND_UP(sizeof(struct bch_xattr) +
- qname.len + size,
- sizeof(u64));
-
- if (u64s > U8_MAX) {
- ret = -ERANGE;
- break;
- }
+ struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
+ int ret;
- bch_keylist_init(&keys, NULL, 0);
-
- if (bch_keylist_realloc(&keys, u64s)) {
- ret = -ENOMEM;
- break;
- }
+ if (!value) {
+ ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash,
+ c, ei->vfs_inode.i_ino,
+ &ei->journal_seq, &search);
+ } else {
+ struct bkey_i_xattr *xattr;
+ unsigned u64s = BKEY_U64s +
+ DIV_ROUND_UP(sizeof(struct bch_xattr) +
+ search.name.len + size,
+ sizeof(u64));
+
+ if (u64s > U8_MAX)
+ return -ERANGE;
+
+ xattr = kmalloc(u64s * sizeof(u64), GFP_NOFS);
+ if (!xattr)
+ return -ENOMEM;
+
+ bkey_xattr_init(&xattr->k_i);
+ xattr->k.u64s = u64s;
+ xattr->v.x_type = type;
+ xattr->v.x_name_len = search.name.len;
+ xattr->v.x_val_len = cpu_to_le16(size);
+ memcpy(xattr->v.x_name, search.name.name, search.name.len);
+ memcpy(xattr_val(&xattr->v), value, size);
+
+ ret = bch_hash_set(xattr_hash_desc, &ei->str_hash, c,
+ ei->vfs_inode.i_ino, &ei->journal_seq,
+ &xattr->k_i,
+ (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
+ (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
+ kfree(xattr);
+ }
- xattr = bkey_xattr_init(keys.top);
- xattr->k.u64s = u64s;
- xattr->k.p = k.k->p;
- xattr->v.x_type = type;
- xattr->v.x_name_len = qname.len;
- xattr->v.x_val_len = cpu_to_le16(size);
- memcpy(xattr->v.x_name, qname.name, qname.len);
- memcpy(xattr_val(&xattr->v), value, size);
-
- BUG_ON(xattr_cmp(&xattr->v, type, &qname));
-
- bch_keylist_enqueue(&keys);
-
- ret = bch_btree_insert_at(&iter, &xattr->k_i, NULL, NULL,
- &ei->journal_seq,
- insert_flags);
- bch_keylist_free(&keys);
- } else {
- struct bkey_i whiteout;
- /* removing */
- bkey_init(&whiteout.k);
- whiteout.k.type = BCH_XATTR_WHITEOUT;
- whiteout.k.p = k.k->p;
-
- ret = bch_btree_insert_at(&iter, &whiteout,
- NULL, NULL, &ei->journal_seq,
- insert_flags);
- }
+ if (ret == -ENOENT)
+ ret = flags & XATTR_REPLACE ? -ENODATA : 0;
- if (ret != -EINTR)
- break;
- }
-out:
- bch_btree_iter_unlock(&iter);
return ret;
}