diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-10-12 21:41:23 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 21:35:43 -0900 |
commit | ffc121ef9c4f1f47e23c416921413b4f5ab58e9d (patch) | |
tree | 4474783ddd99e8b349cdffd5bb128c8c738cbf32 | |
parent | a110043e8efc692df120c41cc5bcdb60dbb975d7 (diff) |
bcache: Persistent reservations
-rw-r--r-- | drivers/md/bcache/bcache.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/bkey_methods.h | 17 | ||||
-rw-r--r-- | drivers/md/bcache/btree_gc.c | 67 | ||||
-rw-r--r-- | drivers/md/bcache/btree_gc.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/btree_types.h | 12 | ||||
-rw-r--r-- | drivers/md/bcache/buckets.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 52 | ||||
-rw-r--r-- | drivers/md/bcache/fs.c | 18 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 12 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 6 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 14 |
12 files changed, 161 insertions, 51 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 82aa2172f048..342dd72948bd 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -590,8 +590,8 @@ struct cache_set { struct cache_group cache_tiers[CACHE_TIERS]; u64 capacity; /* sectors */ - atomic_long_t sectors_reserved; - atomic_long_t sectors_reserved_cache; + atomic64_t sectors_reserved; + atomic64_t sectors_reserved_cache; struct mutex bucket_lock; diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h index a21325b87208..a01de6d45be3 100644 --- a/drivers/md/bcache/bkey_methods.h +++ b/drivers/md/bcache/bkey_methods.h @@ -8,7 +8,22 @@ enum bkey_type { BKEY_TYPE_BTREE, }; -#define btree_node_type(_b) ((_b)->level ? BKEY_TYPE_BTREE : (_b)->btree_id) +/* Type of a key in btree @id at level @level: */ +static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) +{ + return level ? BKEY_TYPE_BTREE : id; +} + +static inline bool btree_type_has_ptrs(enum bkey_type type) +{ + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + return true; + default: + return false; + } +} struct cache_set; struct btree; diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c index 5c9772986108..bf33efbc50a0 100644 --- a/drivers/md/bcache/btree_gc.c +++ b/drivers/md/bcache/btree_gc.c @@ -118,30 +118,62 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k) return max_stale; } -u8 __bch_btree_mark_key(struct cache_set *c, int level, struct bkey_s_c k) +/* + * For runtime mark and sweep: + */ +static u8 __bch_btree_mark_key(struct cache_set *c, enum bkey_type type, + struct bkey_s_c k) { - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + if (bkey_extent_is_data(k.k)) { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + + bch_mark_pointers(c, e, + type == BKEY_TYPE_BTREE + ? CACHE_BTREE_NODE_SIZE(&c->sb) + : e.k->size, false, + type == BKEY_TYPE_BTREE, + true, GC_POS_MIN); + } - bch_mark_pointers(c, e, level - ? CACHE_BTREE_NODE_SIZE(&c->sb) - : e.k->size, false, level != 0, - true, GC_POS_MIN); + return bch_btree_key_recalc_oldest_gen(c, k); + default: + BUG(); } - - return bch_btree_key_recalc_oldest_gen(c, k); } static u8 btree_mark_key(struct cache_set *c, struct btree *b, struct bkey_s_c k) { - return __bch_btree_mark_key(c, b->level, k); + return __bch_btree_mark_key(c, btree_node_type(b), k); +} + +/* + * For initial cache set bringup: + */ +u8 __bch_btree_mark_key_initial(struct cache_set *c, enum bkey_type type, + struct bkey_s_c k) +{ + + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + if (k.k->type == BCH_RESERVATION) + atomic64_add(k.k->size, &c->sectors_reserved); + + return __bch_btree_mark_key(c, type, k); + default: + BUG(); + } + } -/* Only the extent btree has leafs whose keys point to data */ -static inline bool btree_node_has_ptrs(struct btree *b) +static u8 btree_mark_key_initial(struct cache_set *c, struct btree *b, + struct bkey_s_c k) { - return b->btree_id == BTREE_ID_EXTENTS || b->level > 0; + return __bch_btree_mark_key_initial(c, btree_node_type(b), k); } static bool btree_gc_mark_node(struct cache_set *c, struct btree *b) @@ -218,7 +250,7 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id) spin_lock(&c->btree_root_lock); b = c->btree_roots[btree_id]; - __bch_btree_mark_key(c, b->level + 1, bkey_i_to_s_c(&b->key)); + __bch_btree_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key)); gc_pos_set(c, gc_pos_btree_root(b->btree_id)); spin_unlock(&c->btree_root_lock); @@ -828,11 +860,12 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id) for_each_btree_node_key_unpack(&b->keys, &tup, &node_iter) - btree_mark_key(c, b, bkey_tup_to_s_c(&tup)); + btree_mark_key_initial(c, b, + bkey_tup_to_s_c(&tup)); } - __bch_btree_mark_key(c, iter.level + 1, - bkey_i_to_s_c(&b->key)); + __bch_btree_mark_key_initial(c, BKEY_TYPE_BTREE, + bkey_i_to_s_c(&b->key)); bch_btree_iter_cond_resched(&iter); } diff --git a/drivers/md/bcache/btree_gc.h b/drivers/md/bcache/btree_gc.h index 40b160e715fd..50db7b4613d8 100644 --- a/drivers/md/bcache/btree_gc.h +++ b/drivers/md/bcache/btree_gc.h @@ -3,12 +3,15 @@ #include "btree_types.h" +enum bkey_type; + void bch_gc(struct cache_set *); void bch_gc_thread_stop(struct cache_set *); int bch_gc_thread_start(struct cache_set *); int bch_initial_gc(struct cache_set *, struct list_head *); u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c); -u8 __bch_btree_mark_key(struct cache_set *, int, struct bkey_s_c); +u8 __bch_btree_mark_key_initial(struct cache_set *, enum bkey_type, + struct bkey_s_c); /* * For concurrent mark and sweep (with other index updates), we define a total diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index 75e320e4466f..0385f7107a4d 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -7,6 +7,7 @@ #include <linux/semaphore.h> #include <linux/workqueue.h> +#include "bkey_methods.h" #include "bset.h" #include "journal_types.h" #include "six.h" @@ -122,4 +123,15 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) return i - (void *) b->data; } +/* Type of keys @b contains: */ +static inline enum bkey_type btree_node_type(struct btree *b) +{ + return b->level ? BKEY_TYPE_BTREE : b->btree_id; +} + +static inline bool btree_node_has_ptrs(struct btree *b) +{ + return btree_type_has_ptrs(btree_node_type(b)); +} + #endif /* _BCACHE_BTREE_TYPES_H */ diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h index c470adcd04b8..7ea15ec4f9c7 100644 --- a/drivers/md/bcache/buckets.h +++ b/drivers/md/bcache/buckets.h @@ -230,7 +230,7 @@ static inline u64 __cache_set_sectors_used(struct cache_set *c) } rcu_read_unlock(); - return used + atomic_long_read(&c->sectors_reserved); + return used + atomic64_read(&c->sectors_reserved); } static inline u64 cache_set_sectors_used(struct cache_set *c) diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 9f0a42f1b3ee..64667b116331 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -192,7 +192,8 @@ void bch_verify_inode_refs(struct cache_set *c) for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(BCACHE_ROOT_INO, 0), k) { - if (k.k->type == KEY_TYPE_DISCARD) + if (k.k->type == KEY_TYPE_DISCARD || + k.k->type == BCH_RESERVATION) continue; if (k.k->p.inode != cur_inum && diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index f61388bb734d..005b50163a16 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -775,9 +775,12 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, static int bch_add_sectors(struct cache_set *c, struct btree *b, struct bkey_s_c k, u64 offset, - int sectors, bool fail_if_stale) + s64 sectors, bool fail_if_stale) { - if (sectors && bkey_extent_is_data(k.k)) { + if (!sectors) + return 0; + + if (bkey_extent_is_data(k.k)) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); int ret; @@ -790,13 +793,15 @@ static int bch_add_sectors(struct cache_set *c, struct btree *b, if (!bkey_extent_is_cached(e.k)) bcache_dev_sectors_dirty_add(c, e.k->p.inode, offset, sectors); + } else if (k.k->type == BCH_RESERVATION) { + atomic64_add_bug(sectors, &c->sectors_reserved); } return 0; } static void bch_subtract_sectors(struct cache_set *c, struct btree *b, - struct bkey_s_c k, u64 offset, int sectors) + struct bkey_s_c k, u64 offset, s64 sectors) { bch_add_sectors(c, b, k, offset, -sectors, false); } @@ -1362,15 +1367,17 @@ invalid: return reason; } + case BCH_RESERVATION: + return NULL; + default: return "invalid value type"; } } -static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, - struct bkey_s_c k) +static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, + struct bkey_s_c_extent e) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct cache_member_rcu *mi; struct cache *ca; @@ -1462,7 +1469,8 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, if (!bkey_extent_is_cached(e.k) && replicas < CACHE_SET_DATA_REPLICAS_HAVE(&c->sb)) { - bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); + bch_bkey_val_to_text(c, btree_node_type(b), buf, + sizeof(buf), e.s_c); cache_set_bug(c, "extent key bad (too few replicas, %u < %llu): %s", replicas, CACHE_SET_DATA_REPLICAS_HAVE(&c->sb), buf); @@ -1474,7 +1482,8 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, */ for (i = 0; i < CACHE_TIERS; i++) if (ptrs_per_tier[i] > CACHE_SET_DATA_REPLICAS_WANT(&c->sb)) { - bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); + bch_bkey_val_to_text(c, btree_node_type(b), buf, + sizeof(buf), e.s_c); cache_set_bug(c, "extent key bad (too many tier %u replicas): %s", i, buf); @@ -1484,14 +1493,16 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, return; bad_device: - bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); + bch_bkey_val_to_text(c, btree_node_type(b), buf, + sizeof(buf), e.s_c); cache_set_bug(c, "extent pointer to dev %u missing device: %s", ptr->dev, buf); cache_member_info_put(); return; bad_ptr: - bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); + bch_bkey_val_to_text(c, btree_node_type(b), buf, + sizeof(buf), e.s_c); cache_set_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i " "gen %i last_gc %i mark 0x%08x", buf, PTR_BUCKET_NR(ca, ptr), @@ -1501,6 +1512,20 @@ bad_ptr: return; } +static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, + struct bkey_s_c k) +{ + switch (k.k->type) { + case BCH_EXTENT: + case BCH_EXTENT_CACHED: + bch_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k)); + case BCH_RESERVATION: + break; + default: + BUG(); + } +} + static void bch_extent_to_text(struct cache_set *c, char *buf, size_t size, struct bkey_s_c k) { @@ -1656,6 +1681,8 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey_s k) } return false; + case BCH_RESERVATION: + return false; default: BUG(); } @@ -1715,6 +1742,10 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, rcu_read_unlock(); return; + case BCH_RESERVATION: + ret->ca = NULL; + return; + default: BUG(); } @@ -1747,6 +1778,7 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk, case KEY_TYPE_DELETED: case KEY_TYPE_DISCARD: case KEY_TYPE_ERROR: + case BCH_RESERVATION: /* These types are mergeable, and no val to check */ break; diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c index 4a913660696a..770690f60b21 100644 --- a/drivers/md/bcache/fs.c +++ b/drivers/md/bcache/fs.c @@ -54,19 +54,21 @@ static int bch_read_single_page(struct page *, struct address_space *); static int reserve_sectors(struct cache_set *c, unsigned sectors) { - if (likely(atomic_long_sub_return(sectors, - &c->sectors_reserved_cache) >= 0)) + u64 sectors_to_get = SECTORS_CACHE + sectors; + + if (likely(atomic64_sub_return(sectors, + &c->sectors_reserved_cache) >= 0)) return 0; - atomic_long_add(SECTORS_CACHE, &c->sectors_reserved); + atomic64_add(sectors_to_get, &c->sectors_reserved); if (likely(!cache_set_full(c))) { - atomic_long_add(SECTORS_CACHE, &c->sectors_reserved_cache); + atomic64_add(sectors_to_get, &c->sectors_reserved_cache); return 0; } - atomic_long_sub_bug(SECTORS_CACHE, &c->sectors_reserved); - atomic_long_add(sectors, &c->sectors_reserved_cache); + atomic64_sub_bug(sectors_to_get, &c->sectors_reserved); + atomic64_add(sectors, &c->sectors_reserved_cache); return -ENOSPC; } @@ -92,7 +94,7 @@ static void bch_clear_page_bits(struct cache_set *c, struct bch_inode_info *ei, EBUG_ON(!PageLocked(page)); if (PageAllocated(page)) { - atomic_long_sub_bug(PAGE_SECTORS, &c->sectors_reserved); + atomic64_sub_bug(PAGE_SECTORS, &c->sectors_reserved); ClearPageAllocated(page); } @@ -1744,7 +1746,7 @@ static void bch_writepage_io_done(struct closure *cl) struct bio_vec *bvec; unsigned i; - atomic_long_sub_bug(io->sectors_reserved, &c->sectors_reserved); + atomic64_sub_bug(io->sectors_reserved, &c->sectors_reserved); for (i = 0; i < ARRAY_SIZE(io->i_size_update_count); i++) i_size_update_put(c, ei, i, io->i_size_update_count[i]); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index a1225705affd..65142eb49069 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -756,12 +756,12 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) list_for_each_entry(r, list, list) for_each_jset_key(k, n, j, &r->j) { - if ((j->level || j->btree_id == BTREE_ID_EXTENTS) && - !bkey_invalid(c, j->level - ? BKEY_TYPE_BTREE : j->btree_id, - bkey_i_to_s_c(k))) - __bch_btree_mark_key(c, j->level, - bkey_i_to_s_c(k)); + enum bkey_type type = bkey_type(j->level, j->btree_id); + + if (btree_type_has_ptrs(type) && + !bkey_invalid(c, type, bkey_i_to_s_c(k))) + __bch_btree_mark_key_initial(c, type, + bkey_i_to_s_c(k)); } } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 809d5a01ba8a..5ce8590e39f5 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -589,8 +589,10 @@ static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf) return scnprintf(buf, PAGE_SIZE, "capacity:\t\t%llu\n" "meta sectors:\t\t%llu\n" - "dirty sectors:\t\t%llu\n", - c->capacity, meta, dirty); + "dirty sectors:\t\t%llu\n" + "reserved sectors:\t%lu\n", + c->capacity, meta, dirty, + atomic64_read(&c->sectors_reserved)); } static ssize_t bch_compression_stats(struct cache_set *c, char *buf) diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 181686996c05..e6fec099efa3 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -364,17 +364,22 @@ struct bch_extent_crc64 { #define CRC64_EXTENT_SIZE_MAX (1U << 17) +/* + * @reservation - pointer hasn't been written to, just reserved + */ struct bch_extent_ptr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:2, erasure_coded:1, - offset:45, /* 16 petabytes */ + reservation:1, + offset:44, /* 8 petabytes */ dev:8, gen:8; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 gen:8, dev:8, - offset:45, + offset:44, + reservation:1, erasure_coded:1, type:2; #endif @@ -396,6 +401,11 @@ enum { * have the same value type: */ BCH_EXTENT_CACHED = 129, + + /* + * Persistent reservation: + */ + BCH_RESERVATION = 130, }; struct bch_extent { |