diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-05-24 15:53:15 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 21:39:32 -0900 |
commit | 5818a280d149df79f8ae76e48646c45b4dbb2f5d (patch) | |
tree | d6e0b73a400d2f7e4045b98e48a44038fefe06e2 | |
parent | 60f5b5a0e67e6ab36955a13f0e429852299fbb99 (diff) |
bcache: btree as hash table improvements
-rw-r--r-- | drivers/md/bcache/bkey.h | 7 | ||||
-rw-r--r-- | drivers/md/bcache/dirent.c | 320 | ||||
-rw-r--r-- | drivers/md/bcache/fs.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/fs.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/str_hash.h | 272 | ||||
-rw-r--r-- | drivers/md/bcache/xattr.c | 247 |
6 files changed, 502 insertions, 353 deletions
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h index 08b805408d8f..881c5ebea4f3 100644 --- a/drivers/md/bcache/bkey.h +++ b/drivers/md/bcache/bkey.h @@ -329,8 +329,11 @@ static inline void bkey_reassemble(struct bkey_i *dst, memcpy(&dst->v, src.v, bkey_val_bytes(src.k)); } -#define bkey_s_null ((struct bkey_s) { .k = NULL, .v = NULL }) -#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL, .v = NULL }) +#define bkey_s_null ((struct bkey_s) { .k = NULL }) +#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL }) + +#define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) }) +#define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) }) static inline struct bkey_s bkey_to_s(struct bkey *k) { diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c index 40d49f8593f3..6b323912e810 100644 --- a/drivers/md/bcache/dirent.c +++ b/drivers/md/bcache/dirent.c @@ -8,43 +8,72 @@ #include "keylist.h" #include "str_hash.h" -static struct bpos bch_dirent_pos(struct bch_inode_info *ei, - const struct qstr *name) +static unsigned dirent_name_bytes(struct bkey_s_c_dirent d) { - struct bch_str_hash_ctx ctx; - u64 hash; + unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent); - bch_str_hash_init(&ctx, ei->str_hash_type); + while (len && !d.v->d_name[len - 1]) + --len; + + return len; +} + +static u64 bch_dirent_hash(const struct bch_hash_info *info, + const struct qstr *name) +{ + struct bch_str_hash_ctx ctx; - bch_str_hash_update(&ctx, ei->str_hash_type, - &ei->str_hash_seed, sizeof(ei->str_hash_seed)); - bch_str_hash_update(&ctx, ei->str_hash_type, name->name, name->len); + bch_str_hash_init(&ctx, info->type); + bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed)); - hash = bch_str_hash_end(&ctx, ei->str_hash_type); + bch_str_hash_update(&ctx, info->type, name->name, name->len); /* [0,2) reserved for dots */ + return max_t(u64, bch_str_hash_end(&ctx, info->type), 2); +} - return POS(ei->vfs_inode.i_ino, hash >= 2 ? hash : 2); +static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) +{ + return bch_dirent_hash(info, key); } -static unsigned dirent_name_bytes(struct bkey_s_c_dirent d) +static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { - unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent); + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d)); - while (len && !d.v->d_name[len - 1]) - --len; + return bch_dirent_hash(info, &name); +} - return len; +static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) +{ + struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); + int len = dirent_name_bytes(l); + const struct qstr *r = _r; + + return len - r->len ?: memcmp(l.v->d_name, r->name, len); } -static int dirent_cmp(struct bkey_s_c_dirent d, - const struct qstr *q) +static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) { - int len = dirent_name_bytes(d); + struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); + struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); + int l_len = dirent_name_bytes(l); + int r_len = dirent_name_bytes(r); - return len - q->len ?: memcmp(d.v->d_name, q->name, len); + return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len); } +static const struct bch_hash_desc dirent_hash_desc = { + .btree_id = BTREE_ID_DIRENTS, + .key_type = BCH_DIRENT, + .whiteout_type = BCH_DIRENT_WHITEOUT, + .hash_key = dirent_hash_key, + .hash_bkey = dirent_hash_bkey, + .cmp_key = dirent_cmp_key, + .cmp_bkey = dirent_cmp_bkey, +}; + static const char *bch_dirent_invalid(const struct cache_set *c, struct bkey_s_c k) { @@ -98,60 +127,15 @@ const struct bkey_ops bch_bkey_dirent_ops = { .val_to_text = bch_dirent_to_text, }; -static bool dirent_needs_whiteout(struct bch_inode_info *ei, - struct btree_iter *iter) -{ - struct btree *b = iter->nodes[0]; - /* - * hack: we don't want to advance @iter, because caller is about to - * insert at @iter's current position and we can't just rewind it - so, - * just copy the node iter - */ - struct btree_node_iter node_iter = iter->node_iters[0]; - struct bkey_s_c k; - struct bkey u; - struct bpos cur_pos = iter->pos; - - bch_btree_node_iter_advance(&node_iter, &b->keys); - cur_pos = bkey_successor(cur_pos); - - while ((k = bch_btree_node_iter_next_unpack(&node_iter, &b->keys, &u)).k && - k.k->p.inode == iter->pos.inode && - !bkey_cmp(k.k->p, cur_pos)) { - if (k.k->type == BCH_DIRENT) { - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr name = { - .name = d.v->d_name, - .len = dirent_name_bytes(d), - }; - - if (bkey_cmp(bch_dirent_pos(ei, &name), iter->pos) <= 0) - return true; - } - - cur_pos = bkey_successor(cur_pos); - } - - /* - * end of node, can't (yet) check the next one (would have to keep it - * locked while we do the deletion) - */ - if (bkey_cmp(cur_pos, b->key.k.p) > 0) - return true; - - return false; -} - static struct bkey_i_dirent *dirent_create_key(u8 type, - const struct qstr *name, - u64 dst) + const struct qstr *name, u64 dst) { struct bkey_i_dirent *dirent; unsigned u64s = BKEY_U64s + DIV_ROUND_UP(sizeof(struct bch_dirent) + name->len, sizeof(u64)); - dirent = kmalloc(u64s * sizeof(u64), GFP_KERNEL); + dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS); if (!dirent) return NULL; @@ -166,74 +150,15 @@ static struct bkey_i_dirent *dirent_create_key(u8 type, (sizeof(struct bch_dirent) + name->len)); EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); - EBUG_ON(dirent_cmp(dirent_i_to_s_c(dirent), name)); return dirent; } -static struct bkey_s_c __dirent_find(struct btree_iter *iter, - u64 dir, const struct qstr *name) -{ - struct bkey_s_c k; - - while ((k = bch_btree_iter_peek_with_holes(iter)).k) { - if (k.k->p.inode != dir) - break; - - switch (k.k->type) { - case BCH_DIRENT: - if (!dirent_cmp(bkey_s_c_to_dirent(k), name)) - return k; - - /* hash collision, keep going */ - break; - case BCH_DIRENT_WHITEOUT: - /* hash collision, keep going */ - break; - default: - /* hole, not found */ - goto not_found; - } - - bch_btree_iter_advance_pos(iter); - } -not_found: - return (struct bkey_s_c) { .k = ERR_PTR(-ENOENT) }; -} - -static struct bkey_s_c __dirent_find_hole(struct btree_iter *iter, - u64 dir, const struct qstr *name) -{ - struct bkey_s_c k; - - while ((k = bch_btree_iter_peek_with_holes(iter)).k) { - if (k.k->p.inode != dir) - break; - - switch (k.k->type) { - case BCH_DIRENT: - if (!dirent_cmp(bkey_s_c_to_dirent(k), name)) - return (struct bkey_s_c) { .k = ERR_PTR(-EEXIST) }; - - /* hash collision, keep going */ - break; - default: - return k; - } - - bch_btree_iter_advance_pos(iter); - } - - return (struct bkey_s_c) { .k = ERR_PTR(-ENOSPC) }; -} - int bch_dirent_create(struct inode *dir, u8 type, const struct qstr *name, u64 dst_inum) { struct cache_set *c = dir->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(dir); - struct btree_iter iter; - struct bkey_s_c k; struct bkey_i_dirent *dirent; int ret; @@ -241,28 +166,9 @@ int bch_dirent_create(struct inode *dir, u8 type, if (!dirent) return -ENOMEM; - bch_btree_iter_init_intent(&iter, c, BTREE_ID_DIRENTS, - bch_dirent_pos(ei, name)); - - do { - k = __dirent_find_hole(&iter, dir->i_ino, name); - if (IS_ERR(k.k)) { - ret = bch_btree_iter_unlock(&iter) ?: PTR_ERR(k.k); - break; - } - - dirent->k.p = k.k->p; - - ret = bch_btree_insert_at(&iter, &dirent->k_i, - NULL, NULL, &ei->journal_seq, - BTREE_INSERT_ATOMIC); - /* - * XXX: if we ever cleanup whiteouts, we may need to rewind - * iterator on -EINTR - */ - } while (ret == -EINTR); - - bch_btree_iter_unlock(&iter); + ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c, + ei->vfs_inode.i_ino, &ei->journal_seq, + &dirent->k_i, 0); kfree(dirent); return ret; @@ -275,6 +181,12 @@ static void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } +static struct bpos bch_dirent_pos(struct bch_inode_info *ei, + const struct qstr *name) +{ + return POS(ei->vfs_inode.i_ino, bch_dirent_hash(&ei->str_hash, name)); +} + int bch_dirent_rename(struct cache_set *c, struct inode *src_dir, const struct qstr *src_name, struct inode *dst_dir, const struct qstr *dst_name, @@ -282,13 +194,13 @@ int bch_dirent_rename(struct cache_set *c, { struct bch_inode_info *src_ei = to_bch_ei(src_dir); struct bch_inode_info *dst_ei = to_bch_ei(dst_dir); - struct btree_iter src_iter; - struct btree_iter dst_iter; + struct btree_iter src_iter, dst_iter, whiteout_iter; struct bkey_s_c old_src, old_dst; struct bkey delete; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos src_pos = bch_dirent_pos(src_ei, src_name); struct bpos dst_pos = bch_dirent_pos(dst_ei, dst_name); + bool need_whiteout; int ret = -ENOMEM; if (mode == BCH_RENAME_EXCHANGE) { @@ -323,23 +235,38 @@ int bch_dirent_rename(struct cache_set *c, * lock ordering. */ if (bkey_cmp(src_iter.pos, dst_iter.pos) < 0) { - old_src = __dirent_find(&src_iter, src_dir->i_ino, - src_name); - + old_src = bch_hash_lookup_at(dirent_hash_desc, + &src_ei->str_hash, + &src_iter, src_name); + + need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc, + &src_ei->str_hash, + &whiteout_iter, &src_iter); + + /* + * Note that in BCH_RENAME mode, we're _not_ checking if + * the target already exists - we're relying on the VFS + * to do that check for us for correctness: + */ old_dst = mode == BCH_RENAME - ? __dirent_find_hole(&dst_iter, dst_dir->i_ino, - dst_name) - : __dirent_find(&dst_iter, dst_dir->i_ino, - dst_name); + ? bch_hash_hole_at(dirent_hash_desc, &dst_iter) + : bch_hash_lookup_at(dirent_hash_desc, + &dst_ei->str_hash, + &dst_iter, dst_name); } else { old_dst = mode == BCH_RENAME - ? __dirent_find_hole(&dst_iter, dst_dir->i_ino, - dst_name) - : __dirent_find(&dst_iter, dst_dir->i_ino, - dst_name); - - old_src = __dirent_find(&src_iter, src_dir->i_ino, - src_name); + ? bch_hash_hole_at(dirent_hash_desc, &dst_iter) + : bch_hash_lookup_at(dirent_hash_desc, + &dst_ei->str_hash, + &dst_iter, dst_name); + + old_src = bch_hash_lookup_at(dirent_hash_desc, + &src_ei->str_hash, + &src_iter, src_name); + + need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc, + &src_ei->str_hash, + &whiteout_iter, &src_iter); } if (IS_ERR(old_src.k)) { @@ -368,9 +295,9 @@ int bch_dirent_rename(struct cache_set *c, * were going to delete: * * Note: this is a correctness issue, in this - * situation dirent_needs_whiteout() could return - * false when the whiteout would have been - * needed if we inserted at the pos + * situation bch_hash_needs_whiteout() could + * return false when the whiteout would have + * been needed if we inserted at the pos * __dirent_find_hole() found */ new_dst->k.p = src_iter.pos; @@ -381,7 +308,7 @@ int bch_dirent_rename(struct cache_set *c, goto insert_done; } - if (dirent_needs_whiteout(src_ei, &src_iter)) + if (need_whiteout) new_src->k.type = BCH_DIRENT_WHITEOUT; break; case BCH_RENAME_OVERWRITE: @@ -392,12 +319,13 @@ int bch_dirent_rename(struct cache_set *c, bkey_cmp(src_iter.pos, dst_iter.pos) < 0) { /* * Same case described above - - * dirent_needs_whiteout could spuriously return - * false, but we have to insert at dst_iter.pos - * because we're overwriting another dirent: + * bch_hash_needs_whiteout could spuriously + * return false, but we have to insert at + * dst_iter.pos because we're overwriting + * another dirent: */ new_src->k.type = BCH_DIRENT_WHITEOUT; - } else if (dirent_needs_whiteout(src_ei, &src_iter)) + } else if (need_whiteout) new_src->k.type = BCH_DIRENT_WHITEOUT; break; case BCH_RENAME_EXCHANGE: @@ -417,6 +345,7 @@ int bch_dirent_rename(struct cache_set *c, NULL, NULL, journal_seq, BTREE_INSERT_ATOMIC); insert_done: + bch_btree_iter_unlink(&whiteout_iter); bch_btree_iter_unlock(&src_iter); bch_btree_iter_unlock(&dst_iter); @@ -444,38 +373,10 @@ int bch_dirent_delete(struct inode *dir, const struct qstr *name) { struct cache_set *c = dir->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(dir); - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i delete; - int ret = -ENOENT; - bch_btree_iter_init_intent(&iter, c, BTREE_ID_DIRENTS, - bch_dirent_pos(ei, name)); - - do { - k = __dirent_find(&iter, dir->i_ino, name); - if (IS_ERR(k.k)) - return bch_btree_iter_unlock(&iter) ?: PTR_ERR(k.k); - - bkey_init(&delete.k); - delete.k.p = k.k->p; - delete.k.type = dirent_needs_whiteout(ei, &iter) - ? BCH_DIRENT_WHITEOUT - : KEY_TYPE_DELETED; - - ret = bch_btree_insert_at(&iter, &delete, - NULL, NULL, &ei->journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_ATOMIC); - /* - * XXX: if we ever cleanup whiteouts, we may need to rewind - * iterator on -EINTR - */ - } while (ret == -EINTR); - - bch_btree_iter_unlock(&iter); - - return ret; + return bch_hash_delete(dirent_hash_desc, &ei->str_hash, + c, ei->vfs_inode.i_ino, + &ei->journal_seq, name); } u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name) @@ -484,15 +385,16 @@ u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name) struct bch_inode_info *ei = to_bch_ei(dir); struct btree_iter iter; struct bkey_s_c k; - u64 inum = 0; + u64 inum; - bch_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, - bch_dirent_pos(ei, name)); - - k = __dirent_find(&iter, dir->i_ino, name); - if (!IS_ERR(k.k)) - inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); + k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c, + ei->vfs_inode.i_ino, &iter, name); + if (IS_ERR(k.k)) { + bch_btree_iter_unlock(&iter); + return 0; + } + inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); bch_btree_iter_unlock(&iter); return inum; diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c index 182e81849de7..aa8037b9b226 100644 --- a/drivers/md/bcache/fs.c +++ b/drivers/md/bcache/fs.c @@ -989,8 +989,8 @@ static void bch_inode_init(struct bch_inode_info *ei, inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime)); bch_inode_flags_to_vfs(inode); - ei->str_hash_seed = le64_to_cpu(bi->i_hash_seed); - ei->str_hash_type = INODE_STR_HASH_TYPE(bi); + ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed); + ei->str_hash.type = INODE_STR_HASH_TYPE(bi); inode->i_mapping->a_ops = &bch_address_space_operations; diff --git a/drivers/md/bcache/fs.h b/drivers/md/bcache/fs.h index be172602ac98..e8f627c6ba45 100644 --- a/drivers/md/bcache/fs.h +++ b/drivers/md/bcache/fs.h @@ -1,6 +1,8 @@ #ifndef _BCACHE_FS_H #define _BCACHE_FS_H +#include "str_hash.h" + #include <linux/seqlock.h> struct bch_inode_info { @@ -21,8 +23,7 @@ struct bch_inode_info { atomic_long_t i_sectors_dirty_count; atomic64_t i_sectors; - u64 str_hash_seed; - u8 str_hash_type; + struct bch_hash_info str_hash; }; #define to_bch_ei(_inode) \ diff --git a/drivers/md/bcache/str_hash.h b/drivers/md/bcache/str_hash.h index e25862e9bc2d..08bd12c6ffe7 100644 --- a/drivers/md/bcache/str_hash.h +++ b/drivers/md/bcache/str_hash.h @@ -1,5 +1,9 @@ +#ifndef _BCACHE_STR_HASH_H +#define _BCACHE_STR_HASH_H +#include "btree_iter.h" #include "siphash.h" + #include <crypto/sha1_base.h> #include <linux/crc32c.h> @@ -82,3 +86,271 @@ static inline u64 bch_str_hash_end(struct bch_str_hash_ctx *ctx, BUG(); } } + +struct bch_hash_info { + u64 seed; + u8 type; +}; + +struct bch_hash_desc { + enum btree_id btree_id; + u8 key_type; + u8 whiteout_type; + + u64 (*hash_key)(const struct bch_hash_info *, const void *); + u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c); + bool (*cmp_key)(struct bkey_s_c, const void *); + bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c); +}; + +static inline struct bkey_s_c +bch_hash_lookup_at(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct btree_iter *iter, const void *search) +{ + struct bkey_s_c k; + u64 inode = iter->pos.inode; + + while ((k = bch_btree_iter_peek_with_holes(iter)).k) { + if (k.k->p.inode != inode) + break; + + if (k.k->type == desc.key_type) { + if (!desc.cmp_key(k, search)) + return k; + } else if (k.k->type == desc.whiteout_type) { + ; + } else { + /* hole, not found */ + break; + } + + bch_btree_iter_advance_pos(iter); + } + + return bkey_s_c_err(-ENOENT); +} + +static inline struct bkey_s_c +bch_hash_lookup_bkey_at(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct btree_iter *iter, struct bkey_s_c search) +{ + struct bkey_s_c k; + u64 inode = iter->pos.inode; + + while ((k = bch_btree_iter_peek_with_holes(iter)).k) { + if (k.k->p.inode != inode) + break; + + if (k.k->type == desc.key_type) { + if (!desc.cmp_bkey(k, search)) + return k; + } else if (k.k->type == desc.whiteout_type) { + ; + } else { + /* hole, not found */ + break; + } + + bch_btree_iter_advance_pos(iter); + } + + return bkey_s_c_err(-ENOENT); +} + +static inline struct bkey_s_c +bch_hash_lookup(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct cache_set *c, u64 inode, + struct btree_iter *iter, const void *key) +{ + bch_btree_iter_init(iter, c, desc.btree_id, + POS(inode, desc.hash_key(info, key))); + + return bch_hash_lookup_at(desc, info, iter, key); +} + +static inline struct bkey_s_c +bch_hash_lookup_intent(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct cache_set *c, u64 inode, + struct btree_iter *iter, const void *key) +{ + bch_btree_iter_init_intent(iter, c, desc.btree_id, + POS(inode, desc.hash_key(info, key))); + + return bch_hash_lookup_at(desc, info, iter, key); +} + +static inline struct bkey_s_c +bch_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter) +{ + struct bkey_s_c k; + u64 inode = iter->pos.inode; + + while ((k = bch_btree_iter_peek_with_holes(iter)).k) { + if (k.k->p.inode != inode) + break; + + if (k.k->type != desc.key_type) + return k; + + /* hash collision, keep going */ + bch_btree_iter_advance_pos(iter); + } + + return bkey_s_c_err(-ENOSPC); +} + +static inline struct bkey_s_c bch_hash_hole(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct cache_set *c, u64 inode, + struct btree_iter *iter, + const void *key) +{ + bch_btree_iter_init_intent(iter, c, desc.btree_id, + POS(inode, desc.hash_key(info, key))); + + return bch_hash_hole_at(desc, iter); +} + +static inline bool bch_hash_needs_whiteout(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct btree_iter *iter, + struct btree_iter *start) +{ + struct bkey_s_c k; + + bch_btree_iter_init_copy(iter, start); + + while (1) { + bch_btree_iter_advance_pos(iter); + k = bch_btree_iter_peek_with_holes(iter); + + if (!k.k) + return iter->error ? true : false; + + if (k.k->type != desc.key_type && + k.k->type != desc.whiteout_type) + return false; + + if (k.k->type == desc.key_type && + desc.hash_bkey(info, k) <= start->pos.offset) + return true; + } +} + +#define BCH_HASH_SET_MUST_CREATE 1 +#define BCH_HASH_SET_MUST_REPLACE 2 + +static inline int bch_hash_set(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct cache_set *c, u64 inode, + u64 *journal_seq, + struct bkey_i *insert, int flags) +{ + struct btree_iter iter, hashed_slot; + struct bkey_s_c k; + int ret; + + bch_btree_iter_init_intent(&hashed_slot, c, desc.btree_id, + POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert)))); + + do { + ret = bch_btree_iter_traverse(&hashed_slot); + if (ret) + break; + + /* + * If there's a hash collision and we have to insert at a + * different slot, on -EINTR (insert had to drop locks) we have + * to recheck the slot we hashed to - it could have been deleted + * while we dropped locks: + */ + bch_btree_iter_init_copy(&iter, &hashed_slot); + k = bch_hash_lookup_bkey_at(desc, info, &iter, + bkey_i_to_s_c(insert)); + if (IS_ERR(k.k)) { + if (flags & BCH_HASH_SET_MUST_REPLACE) { + ret = -ENOENT; + goto err_unlink; + } + + /* + * Not found, so we're now looking for any open + * slot - we might have skipped over a whiteout + * that we could have used, so restart from the + * slot we hashed to: + */ + bch_btree_iter_unlink(&iter); + bch_btree_iter_init_copy(&iter, &hashed_slot); + + k = bch_hash_hole_at(desc, &iter); + if (IS_ERR(k.k)) { + ret = PTR_ERR(k.k); + goto err_unlink; + } + } else { + if (flags & BCH_HASH_SET_MUST_CREATE) { + ret = -EEXIST; + goto err_unlink; + } + } + + insert->k.p = iter.pos; + ret = bch_btree_insert_at(&iter, insert, NULL, NULL, + journal_seq, BTREE_INSERT_ATOMIC); +err_unlink: + ret = bch_btree_iter_unlink(&iter) ?: ret; + } while (ret == -EINTR); + + bch_btree_iter_unlock(&hashed_slot); + return ret; +} + +static inline int bch_hash_delete(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct cache_set *c, u64 inode, + u64 *journal_seq, const void *key) +{ + struct btree_iter iter, whiteout_iter; + struct bkey_s_c k; + struct bkey_i delete; + int ret = -ENOENT; + + bch_btree_iter_init_intent(&iter, c, desc.btree_id, + POS(inode, desc.hash_key(info, key))); + + do { + k = bch_hash_lookup_at(desc, info, &iter, key); + if (IS_ERR(k.k)) + return bch_btree_iter_unlock(&iter) ?: -ENOENT; + + bkey_init(&delete.k); + delete.k.p = k.k->p; + delete.k.type = bch_hash_needs_whiteout(desc, info, + &whiteout_iter, &iter) + ? desc.whiteout_type + : KEY_TYPE_DELETED; + + ret = bch_btree_insert_at(&iter, &delete, + NULL, NULL, journal_seq, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_ATOMIC); + /* + * Need to hold whiteout iter locked while we do the delete, if + * we're not leaving a whiteout: + */ + bch_btree_iter_unlink(&whiteout_iter); + /* + * XXX: if we ever cleanup whiteouts, we may need to rewind + * iterator on -EINTR + */ + } while (ret == -EINTR); + + bch_btree_iter_unlock(&iter); + return ret; +} + +#endif /* _BCACHE_STR_HASH_H */ diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c index 3bfe616d817e..bfaf2875e229 100644 --- a/drivers/md/bcache/xattr.c +++ b/drivers/md/bcache/xattr.c @@ -4,39 +4,79 @@ #include "btree_update.h" #include "extents.h" #include "fs.h" -#include "keylist.h" #include "str_hash.h" #include "xattr.h" #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> -static struct bpos bch_xattr_pos(struct bch_inode_info *ei, - const struct qstr *name, u8 type) +struct xattr_search_key { + u8 type; + struct qstr name; +}; + +#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \ + { .type = _type, .name = QSTR_INIT(_name, _len) }) + +static u64 bch_xattr_hash(const struct bch_hash_info *info, + const struct xattr_search_key *key) { struct bch_str_hash_ctx ctx; - bch_str_hash_init(&ctx, ei->str_hash_type); + bch_str_hash_init(&ctx, info->type); + bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed)); - bch_str_hash_update(&ctx, ei->str_hash_type, - &ei->str_hash_seed, sizeof(ei->str_hash_seed)); - bch_str_hash_update(&ctx, ei->str_hash_type, &type, sizeof(type)); - bch_str_hash_update(&ctx, ei->str_hash_type, name->name, name->len); + bch_str_hash_update(&ctx, info->type, &key->type, sizeof(key->type)); + bch_str_hash_update(&ctx, info->type, key->name.name, key->name.len); - return POS(ei->vfs_inode.i_ino, - bch_str_hash_end(&ctx, ei->str_hash_type)); + return bch_str_hash_end(&ctx, info->type); } #define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len) -static int xattr_cmp(const struct bch_xattr *xattr, - u8 type, const struct qstr *q) +static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key) +{ + return bch_xattr_hash(info, key); +} + +static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { - return xattr->x_type != type || - xattr->x_name_len != q->len || - memcmp(xattr->x_name, q->name, q->len); + struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k); + + return bch_xattr_hash(info, + &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len)); +} + +static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r) +{ + struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l); + const struct xattr_search_key *r = _r; + + return l.v->x_type != r->type || + l.v->x_name_len != r->name.len || + memcmp(l.v->x_name, r->name.name, r->name.len); } +static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) +{ + struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l); + struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r); + + return l.v->x_type != r.v->x_type || + l.v->x_name_len != r.v->x_name_len || + memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len); +} + +static const struct bch_hash_desc xattr_hash_desc = { + .btree_id = BTREE_ID_XATTRS, + .key_type = BCH_XATTR, + .whiteout_type = BCH_XATTR_WHITEOUT, + .hash_key = xattr_hash_key, + .hash_bkey = xattr_hash_bkey, + .cmp_key = xattr_cmp_key, + .cmp_bkey = xattr_cmp_bkey, +}; + static const char *bch_xattr_invalid(const struct cache_set *c, struct bkey_s_c k) { @@ -107,41 +147,26 @@ int bch_xattr_get(struct inode *inode, const char *name, { struct cache_set *c = inode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(inode); - struct qstr qname = (struct qstr) QSTR_INIT(name, strlen(name)); struct btree_iter iter; struct bkey_s_c k; - const struct bch_xattr *xattr; - int ret = -ENODATA; - - for_each_btree_key_with_holes(&iter, c, BTREE_ID_XATTRS, - bch_xattr_pos(ei, &qname, type), k) { - switch (k.k->type) { - case BCH_XATTR: - xattr = bkey_s_c_to_xattr(k).v; - - /* collision? */ - if (!xattr_cmp(xattr, type, &qname)) { - unsigned len = le16_to_cpu(xattr->x_val_len); - - ret = len; - if (buffer) { - if (len > size) - ret = -ERANGE; - else - memcpy(buffer, xattr_val(xattr), - len); - } - goto out; - } - break; - case BCH_XATTR_WHITEOUT: - break; - default: - /* hole, not found */ - goto out; - } + struct bkey_s_c_xattr xattr; + int ret; + + k = bch_hash_lookup(xattr_hash_desc, &ei->str_hash, c, + ei->vfs_inode.i_ino, &iter, + &X_SEARCH(type, name, strlen(name))); + if (IS_ERR(k.k)) + return bch_btree_iter_unlock(&iter) ?: -ENODATA; + + xattr = bkey_s_c_to_xattr(k); + ret = le16_to_cpu(xattr.v->x_val_len); + if (buffer) { + if (ret > size) + ret = -ERANGE; + else + memcpy(buffer, xattr_val(xattr.v), ret); } -out: + bch_btree_iter_unlock(&iter); return ret; } @@ -152,100 +177,46 @@ int bch_xattr_set(struct inode *inode, const char *name, { struct bch_inode_info *ei = to_bch_ei(inode); struct cache_set *c = inode->i_sb->s_fs_info; - struct btree_iter iter; - struct bkey_s_c k; - struct qstr qname = (struct qstr) QSTR_INIT((char *) name, - strlen(name)); - int ret = -EINVAL; - unsigned insert_flags = BTREE_INSERT_ATOMIC; - - if (!value) - insert_flags |= BTREE_INSERT_NOFAIL; - - bch_btree_iter_init_intent(&iter, c, BTREE_ID_XATTRS, - bch_xattr_pos(ei, &qname, type)); - - while ((k = bch_btree_iter_peek_with_holes(&iter)).k) { - switch (k.k->type) { - case BCH_XATTR: - /* collision? */ - if (xattr_cmp(bkey_s_c_to_xattr(k).v, type, &qname)) { - bch_btree_iter_advance_pos(&iter); - continue; - } - - if (flags & XATTR_CREATE) { - ret = -EEXIST; - goto out; - } - - break; - case BCH_XATTR_WHITEOUT: - bch_btree_iter_advance_pos(&iter); - continue; - default: - /* hole, not found */ - if (flags & XATTR_REPLACE) { - ret = -ENODATA; - goto out; - } - break; - } - - if (value) { - struct keylist keys; - struct bkey_i_xattr *xattr; - unsigned u64s = BKEY_U64s + - DIV_ROUND_UP(sizeof(struct bch_xattr) + - qname.len + size, - sizeof(u64)); - - if (u64s > U8_MAX) { - ret = -ERANGE; - break; - } + struct xattr_search_key search = X_SEARCH(type, name, strlen(name)); + int ret; - bch_keylist_init(&keys, NULL, 0); - - if (bch_keylist_realloc(&keys, u64s)) { - ret = -ENOMEM; - break; - } + if (!value) { + ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash, + c, ei->vfs_inode.i_ino, + &ei->journal_seq, &search); + } else { + struct bkey_i_xattr *xattr; + unsigned u64s = BKEY_U64s + + DIV_ROUND_UP(sizeof(struct bch_xattr) + + search.name.len + size, + sizeof(u64)); + + if (u64s > U8_MAX) + return -ERANGE; + + xattr = kmalloc(u64s * sizeof(u64), GFP_NOFS); + if (!xattr) + return -ENOMEM; + + bkey_xattr_init(&xattr->k_i); + xattr->k.u64s = u64s; + xattr->v.x_type = type; + xattr->v.x_name_len = search.name.len; + xattr->v.x_val_len = cpu_to_le16(size); + memcpy(xattr->v.x_name, search.name.name, search.name.len); + memcpy(xattr_val(&xattr->v), value, size); + + ret = bch_hash_set(xattr_hash_desc, &ei->str_hash, c, + ei->vfs_inode.i_ino, &ei->journal_seq, + &xattr->k_i, + (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)| + (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0)); + kfree(xattr); + } - xattr = bkey_xattr_init(keys.top); - xattr->k.u64s = u64s; - xattr->k.p = k.k->p; - xattr->v.x_type = type; - xattr->v.x_name_len = qname.len; - xattr->v.x_val_len = cpu_to_le16(size); - memcpy(xattr->v.x_name, qname.name, qname.len); - memcpy(xattr_val(&xattr->v), value, size); - - BUG_ON(xattr_cmp(&xattr->v, type, &qname)); - - bch_keylist_enqueue(&keys); - - ret = bch_btree_insert_at(&iter, &xattr->k_i, NULL, NULL, - &ei->journal_seq, - insert_flags); - bch_keylist_free(&keys); - } else { - struct bkey_i whiteout; - /* removing */ - bkey_init(&whiteout.k); - whiteout.k.type = BCH_XATTR_WHITEOUT; - whiteout.k.p = k.k->p; - - ret = bch_btree_insert_at(&iter, &whiteout, - NULL, NULL, &ei->journal_seq, - insert_flags); - } + if (ret == -ENOENT) + ret = flags & XATTR_REPLACE ? -ENODATA : 0; - if (ret != -EINTR) - break; - } -out: - bch_btree_iter_unlock(&iter); return ret; } |