diff options
-rw-r--r-- | drivers/md/bcache/alloc.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/alloc.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/alloc_types.h | 72 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 490 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 18 | ||||
-rw-r--r-- | drivers/md/bcache/buckets.h | 85 | ||||
-rw-r--r-- | drivers/md/bcache/buckets_types.h | 49 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/io.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 85 | ||||
-rw-r--r-- | drivers/md/bcache/keybuf.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/keybuf_types.h | 30 | ||||
-rw-r--r-- | drivers/md/bcache/move.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/super.h | 174 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/trace.c | 1 |
22 files changed, 544 insertions, 492 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 9703523a338f..862c516e6d5f 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -58,6 +58,7 @@ #include "btree.h" #include "buckets.h" #include "extents.h" +#include "super.h" #include <linux/blkdev.h> #include <linux/kthread.h> diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h index bddd80d2a6c1..9934f1558029 100644 --- a/drivers/md/bcache/alloc.h +++ b/drivers/md/bcache/alloc.h @@ -1,7 +1,7 @@ #ifndef _BCACHE_ALLOC_H #define _BCACHE_ALLOC_H -#include "bcache.h" +#include "alloc_types.h" struct bkey; struct bucket; diff --git a/drivers/md/bcache/alloc_types.h b/drivers/md/bcache/alloc_types.h new file mode 100644 index 000000000000..5741c58f6d98 --- /dev/null +++ b/drivers/md/bcache/alloc_types.h @@ -0,0 +1,72 @@ +#ifndef _BCACHE_ALLOC_TYPES_H +#define _BCACHE_ALLOC_TYPES_H + +/* There is one reserve for each type of btree, one for prios and gens + * and one for moving GC */ +enum alloc_reserve { + RESERVE_PRIO = BTREE_ID_NR, + /* + * free_inc.size buckets are set aside for moving GC btree node + * allocations. This means that if moving GC runs out of new buckets for + * btree nodes, it will have put back at least free_inc.size buckets + * back on free_inc, preventing a deadlock. + * + * XXX: figure out a less stupid way of achieving this + */ + RESERVE_MOVINGGC_BTREE, + /* + * Tiering needs a btree node reserve because of how + * btree_check_reserve() works -- if the cache tier is full, we don't + * want tiering to block forever. + */ + RESERVE_TIERING_BTREE, + RESERVE_METADATA_LAST = RESERVE_TIERING_BTREE, + RESERVE_MOVINGGC, + RESERVE_NONE, + RESERVE_NR, +}; + +/* + * The btree node reserve needs to contain enough buckets so that in a tree of + * depth 2, we can split each level of node, and then allocate a new root. + * See btree_check_reserve(). + */ +#define BTREE_NODE_RESERVE 7 + +/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */ +#define OPEN_BUCKETS_COUNT 256 + +#define WRITE_POINT_COUNT 16 + +struct open_bucket { + struct list_head list; + spinlock_t lock; + atomic_t pin; + unsigned sectors_free; + BKEY_PADDED(key); +}; + +struct write_point { + struct open_bucket *b; + + /* + * If not NULL, refill from that device (this write point is a member of + * that struct cache) + * + * If NULL, do a normal replicated bucket allocation + */ + struct cache *ca; + + /* + * If not NULL, tier specific writepoint used by tiering/promotion - + * always allocates a single replica + */ + struct cache_group *tier; + + /* + * Otherwise do a normal replicated bucket allocation that could come + * from any tier (foreground write) + */ +}; + +#endif /* _BCACHE_ALLOC_TYPES_H */ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 8b27e2471d0c..4825a5f8246c 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -179,7 +179,6 @@ #include <linux/bcache.h> #include <linux/bio.h> -#include <linux/crc32c.h> #include <linux/kobject.h> #include <linux/list.h> #include <linux/mutex.h> @@ -193,7 +192,6 @@ #include <linux/workqueue.h> #include "bset.h" -#include "extents.h" #include "util.h" #include "closure.h" @@ -208,148 +206,13 @@ #define CACHE_RESERVE_PERCENT 20 -struct bucket_mark { - union { - struct { - u32 counter; - }; - - struct { - unsigned owned_by_allocator:1; - unsigned cached_sectors:15; - unsigned is_metadata:1; - unsigned dirty_sectors:15; - }; - }; -}; - -struct bucket { - union { - struct { - u16 read_prio; - u16 write_prio; - }; - u16 prio[2]; - }; - struct bucket_mark mark; - u8 last_gc; /* Most out of date gen in the btree */ - - /* generation copygc is going to move this bucket into */ - u8 copygc_gen; -}; - +#include "alloc_types.h" +#include "buckets_types.h" +#include "journal_types.h" +#include "keybuf_types.h" #include "stats_types.h" -#include "inode.h" -struct search; -struct btree; -struct keybuf; - -/* - * We put two of these in struct journal; we used them for writes to the - * journal that are being staged or in flight. - */ -struct journal_write { - struct jset *data; -#define JSET_BITS 5 - - struct cache_set *c; - struct closure_waitlist wait; -}; - -/* Embedded in struct cache_set */ -struct journal { - unsigned long flags; -#define JOURNAL_NEED_WRITE 0 -#define JOURNAL_DIRTY 1 -#define JOURNAL_REPLAY_DONE 2 - atomic_t in_flight; - - spinlock_t lock; - - unsigned u64s_remaining; - unsigned res_count; - - /* Number of blocks free in the bucket(s) we're currently writing to */ - unsigned blocks_free; - - /* used when waiting because the journal was full */ - wait_queue_head_t wait; - struct closure io; - struct delayed_work work; - - unsigned delay_ms; - - u64 seq; - DECLARE_FIFO(atomic_t, pin); - - BKEY_PADDED(key); - - struct journal_write w[2], *cur; -}; - -/* - * Embedded in struct cache. First three fields refer to the array of journal - * buckets, in cache_sb. - */ -struct journal_device { - /* - * For each journal bucket, contains the max sequence number of the - * journal writes it contains - so we know when a bucket can be reused. - */ - u64 *seq; - - /* Journal bucket we're currently writing to */ - unsigned cur_idx; - - /* Last journal bucket that still contains an open journal entry */ - unsigned last_idx; - - /* Next journal bucket to be discarded */ - unsigned discard_idx; - -#define DISCARD_READY 0 -#define DISCARD_IN_FLIGHT 1 -#define DISCARD_DONE 2 - /* 1 - discard in flight, -1 - discard completed */ - atomic_t discard_in_flight; - - struct work_struct discard_work; - struct bio discard_bio; - struct bio_vec discard_bv; - /* Bio for journal reads/writes to this device */ - struct bio bio; - struct bio_vec bv[1 << JSET_BITS]; - - /* for bch_journal_read_device */ - struct closure read; -}; - -struct keybuf_key { - struct rb_node node; - BKEY_PADDED(key); - atomic_t ref; -}; - -struct keybuf { - struct bkey last_scanned; - spinlock_t lock; - - /* - * Beginning and end of range in rb tree - so that we can skip taking - * lock and checking the rb tree when we need to check for overlapping - * keys. - */ - struct bkey start; - struct bkey end; - - struct rb_root keys; - - struct semaphore in_flight; - -#define KEYBUF_NR 500 - DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); -}; +struct btree; struct bcache_device { struct closure cl; @@ -463,101 +326,6 @@ struct cached_dev { unsigned char writeback_percent; }; -/* There is one reserve for each type of btree, one for prios and gens - * and one for moving GC */ -enum alloc_reserve { - RESERVE_PRIO = BTREE_ID_NR, - /* - * free_inc.size buckets are set aside for moving GC btree node - * allocations. This means that if moving GC runs out of new buckets for - * btree nodes, it will have put back at least free_inc.size buckets - * back on free_inc, preventing a deadlock. - * - * XXX: figure out a less stupid way of achieving this - */ - RESERVE_MOVINGGC_BTREE, - /* - * Tiering needs a btree node reserve because of how - * btree_check_reserve() works -- if the cache tier is full, we don't - * want tiering to block forever. - */ - RESERVE_TIERING_BTREE, - RESERVE_METADATA_LAST = RESERVE_TIERING_BTREE, - RESERVE_MOVINGGC, - RESERVE_NONE, - RESERVE_NR, -}; - -/* - * The btree node reserve needs to contain enough buckets so that in a tree of - * depth 2, we can split each level of node, and then allocate a new root. - * See btree_check_reserve(). - */ -#define BTREE_NODE_RESERVE 7 - -/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */ -#define OPEN_BUCKETS_COUNT 256 - -#define WRITE_POINT_COUNT 16 - -struct open_bucket { - struct list_head list; - spinlock_t lock; - atomic_t pin; - unsigned sectors_free; - BKEY_PADDED(key); -}; - -struct write_point { - struct open_bucket *b; - - /* - * If not NULL, refill from that device (this write point is a member of - * that struct cache) - * - * If NULL, do a normal replicated bucket allocation - */ - struct cache *ca; - - /* - * If not NULL, tier specific writepoint used by tiering/promotion - - * always allocates a single replica - */ - struct cache_group *tier; - - /* - * Otherwise do a normal replicated bucket allocation that could come - * from any tier (foreground write) - */ -}; - -struct bucket_stats { - u64 buckets_dirty; - u64 buckets_cached; - u64 buckets_meta; - u64 buckets_alloc; - - u64 sectors_dirty; - u64 sectors_cached; -}; - -struct bucket_heap_entry { - struct bucket *g; - unsigned long val; -}; - -static inline bool bucket_min_cmp(struct bucket_heap_entry l, - struct bucket_heap_entry r) -{ - return l.val < r.val; -} - -static inline bool bucket_max_cmp(struct bucket_heap_entry l, - struct bucket_heap_entry r) -{ - return l.val > r.val; -} - #define CACHE_DEV_REMOVING 0 struct cache { @@ -657,19 +425,6 @@ struct cache { atomic_long_t sectors_written; }; -static inline void bucket_heap_push(struct cache *ca, struct bucket *g, - unsigned long val) -{ - struct bucket_heap_entry new = { g, val }; - - if (!heap_full(&ca->heap)) - heap_add(&ca->heap, new, bucket_min_cmp); - else if (bucket_min_cmp(new, heap_peek(&ca->heap))) { - ca->heap.data[0] = new; - heap_sift(&ca->heap, 0, bucket_min_cmp); - } -} - struct gc_stat { size_t nodes; size_t key_bytes; @@ -934,13 +689,6 @@ struct bbio { #define to_bbio(_bio) container_of((_bio), struct bbio, bio) -#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) -#define btree_blocks(b) \ - ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits)) - -#define btree_default_blocks(c) \ - ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) - #define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) #define bucket_bytes(c) ((c)->sb.bucket_size << 9) #define block_bytes(c) ((c)->sb.block_size << 9) @@ -951,89 +699,6 @@ struct bbio { #define prio_buckets(c) \ DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) -static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) -{ - return s >> c->bucket_bits; -} - -static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) -{ - return ((sector_t) b) << c->bucket_bits; -} - -static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) -{ - return s & (c->sb.bucket_size - 1); -} - -static inline struct cache_member *cache_member_info(struct cache *ca) -{ - return ca->set->members + ca->sb.nr_this_dev; -} - -static inline struct cache *PTR_CACHE(struct cache_set *c, - const struct bkey *k, - unsigned ptr) -{ - unsigned dev = PTR_DEV(k, ptr); - - return dev < MAX_CACHES_PER_SET - ? rcu_dereference(c->cache[dev]) - : NULL; -} - -static inline size_t PTR_BUCKET_NR(struct cache_set *c, - const struct bkey *k, - unsigned ptr) -{ - return sector_to_bucket(c, PTR_OFFSET(k, ptr)); -} - -static inline u8 PTR_BUCKET_GEN(struct cache_set *c, - struct cache *ca, - const struct bkey *k, - unsigned ptr) -{ - return ca->bucket_gens[PTR_BUCKET_NR(c, k, ptr)]; -} - -static inline struct bucket *PTR_BUCKET(struct cache_set *c, - struct cache *ca, - const struct bkey *k, - unsigned ptr) -{ - return ca->buckets + PTR_BUCKET_NR(c, k, ptr); -} - -static inline uint8_t gen_after(uint8_t a, uint8_t b) -{ - uint8_t r = a - b; - return r > 128U ? 0 : r; -} - -static inline u8 ptr_stale(struct cache_set *c, struct cache *ca, - const struct bkey *k, unsigned ptr) -{ - return gen_after(PTR_BUCKET_GEN(c, ca, k, ptr), PTR_GEN(k, ptr)); -} - -/* checksumming */ - -u64 bch_checksum_update(unsigned, u64, const void *, size_t); -u64 bch_checksum(unsigned, const void *, size_t); - -/* - * This is used for various on disk data structures - cache_sb, prio_set, bset, - * jset: The checksum is _always_ the first 8 bytes of these structs - */ -#define csum_set(i, type) \ -({ \ - void *start = ((void *) (i)) + sizeof(u64); \ - void *end = bset_bkey_last(i); \ - \ - bch_checksum(type, start, end - start); \ -}) - /* Error handling macros */ #define btree_bug(b, ...) \ @@ -1066,153 +731,8 @@ do { \ bch_cache_set_error(c, __VA_ARGS__); \ } while (0) -/* Looping macros */ - -static inline struct cache *bch_next_cache_rcu(struct cache_set *c, - unsigned *iter) -{ - struct cache *ret = NULL; - - while (*iter < c->sb.nr_in_set && - !(ret = rcu_dereference(c->cache[*iter]))) - (*iter)++; - - return ret; -} - -#define for_each_cache_rcu(ca, c, iter) \ - for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++) - -static inline struct cache *bch_get_next_cache(struct cache_set *c, - unsigned *iter) -{ - struct cache *ret; - - rcu_read_lock(); - if ((ret = bch_next_cache_rcu(c, iter))) - percpu_ref_get(&ret->ref); - rcu_read_unlock(); - - return ret; -} - -/* - * If you break early, you must drop your ref on the current cache - */ -#define for_each_cache(ca, c, iter) \ - for ((iter) = 0; \ - (ca = bch_get_next_cache(c, &(iter))); \ - percpu_ref_put(&ca->ref), (iter)++) - -#define for_each_bucket(b, ca) \ - for (b = (ca)->buckets + (ca)->sb.first_bucket; \ - b < (ca)->buckets + (ca)->sb.nbuckets; b++) - -static inline void cached_dev_put(struct cached_dev *dc) -{ - if (atomic_dec_and_test(&dc->count)) - schedule_work(&dc->detach); -} - -static inline bool cached_dev_get(struct cached_dev *dc) -{ - if (!atomic_inc_not_zero(&dc->count)) - return false; - - /* Paired with the mb in cached_dev_attach */ - smp_mb__after_atomic(); - return true; -} - -static inline u64 bcache_dev_inum(struct bcache_device *d) -{ - return KEY_INODE(&d->inode.i_inode.i_key); -} - -static inline struct bcache_device *bch_dev_find(struct cache_set *c, u64 inode) -{ - return radix_tree_lookup(&c->devices, inode); -} - -#define kobj_attribute_write(n, fn) \ - static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) - -#define kobj_attribute_rw(n, show, store) \ - static struct kobj_attribute ksysfs_##n = \ - __ATTR(n, S_IWUSR|S_IRUSR, show, store) - -/* superblock */ - -void bch_check_mark_super_slowpath(struct cache_set *, struct bkey *, bool); - -static inline bool bch_check_super_marked(struct cache_set *c, - struct bkey *k, bool meta) -{ - unsigned ptr; - struct cache_member *mi; - - for (ptr = 0; ptr < bch_extent_ptrs(k); ptr++) { - mi = c->members + PTR_DEV(k, ptr); - - if (!(meta ? CACHE_HAS_METADATA : CACHE_HAS_DATA)(mi)) - return false; - } - - return true; -} - -static inline void bch_check_mark_super(struct cache_set *c, - struct bkey *k, bool meta) -{ - if (bch_check_super_marked(c, k, meta)) - return; - - bch_check_mark_super_slowpath(c, k, meta); -} - /* Forward declarations */ -__printf(2, 3) -bool bch_cache_set_error(struct cache_set *, const char *, ...); - -void bch_prio_write(struct cache *); -void bch_write_bdev_super(struct cached_dev *, struct closure *); - -extern struct workqueue_struct *bcache_io_wq; -extern struct mutex bch_register_lock; -extern struct list_head bch_cache_sets; - -extern struct kobj_type bch_cached_dev_ktype; -extern struct kobj_type bch_flash_dev_ktype; -extern struct kobj_type bch_cache_set_ktype; -extern struct kobj_type bch_cache_set_internal_ktype; -extern struct kobj_type bch_cache_ktype; - -void bch_cached_dev_release(struct kobject *); -void bch_flash_dev_release(struct kobject *); -void bch_cache_set_release(struct kobject *); -void bch_cache_release(struct kobject *); - -int bch_super_realloc(struct cache *, unsigned); -void bcache_write_super(struct cache_set *); - -int bch_flash_dev_create(struct cache_set *, u64); - -int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); -void bch_cached_dev_detach(struct cached_dev *); -void bch_cached_dev_run(struct cached_dev *); -void bcache_device_stop(struct bcache_device *); - -void bch_cache_set_unregister(struct cache_set *); -void bch_cache_set_stop(struct cache_set *); - -void bch_cache_read_only(struct cache *); -const char *bch_cache_read_write(struct cache *); -void bch_cache_remove(struct cache *); -int bch_cache_add(struct cache_set *, const char *); - -void bch_btree_cache_free(struct cache_set *); -int bch_btree_cache_alloc(struct cache_set *); void bch_tiering_init_cache_set(struct cache_set *); int bch_tiering_thread_start(struct cache_set *c); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index d1bd7b4bfae6..2d22463bd977 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -29,6 +29,7 @@ #include "io.h" #include "journal.h" #include "movinggc.h" +#include "super.h" #include "writeback.h" #include <linux/slab.h> diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 4eb60cd5baff..4bc176b4d763 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -171,6 +171,21 @@ static inline void set_gc_sectors(struct cache_set *c) atomic64_set(&c->sectors_until_gc, c->capacity / 16); } +static inline size_t btree_bytes(struct cache_set *c) +{ + return c->btree_pages * PAGE_SIZE; +} + +static inline unsigned btree_blocks(struct btree *b) +{ + return KEY_SIZE(&b->key) >> b->c->block_bits; +} + +static inline unsigned btree_default_blocks(struct cache_set *c) +{ + return (PAGE_SECTORS * c->btree_pages) >> c->block_bits; +} + /* Looping macros */ #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ @@ -254,6 +269,9 @@ int bch_initial_gc(struct cache_set *, struct list_head *); void bch_mark_keybuf_keys(struct cache_set *, struct keybuf *); u8 __bch_btree_mark_key(struct cache_set *, int, struct bkey *); +void bch_btree_cache_free(struct cache_set *); +int bch_btree_cache_alloc(struct cache_set *); + /* Return values from @fn parameter to map_keys and map_nodes */ #define MAP_DONE 0 /* We're done */ #define MAP_CONTINUE 1 /* Continue and advance the iterator */ diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h index 6d2ee701b7f7..65575b47ff82 100644 --- a/drivers/md/bcache/buckets.h +++ b/drivers/md/bcache/buckets.h @@ -7,7 +7,88 @@ #ifndef _BUCKETS_H #define _BUCKETS_H -#include "bcache.h" +#include "buckets_types.h" +#include "super.h" + +#define for_each_bucket(b, ca) \ + for (b = (ca)->buckets + (ca)->sb.first_bucket; \ + b < (ca)->buckets + (ca)->sb.nbuckets; b++) + +static inline struct cache *PTR_CACHE(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + unsigned dev = PTR_DEV(k, ptr); + + return dev < MAX_CACHES_PER_SET + ? rcu_dereference(c->cache[dev]) + : NULL; +} + +static inline size_t PTR_BUCKET_NR(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return sector_to_bucket(c, PTR_OFFSET(k, ptr)); +} + +static inline u8 PTR_BUCKET_GEN(struct cache_set *c, + struct cache *ca, + const struct bkey *k, + unsigned ptr) +{ + return ca->bucket_gens[PTR_BUCKET_NR(c, k, ptr)]; +} + +static inline struct bucket *PTR_BUCKET(struct cache_set *c, + struct cache *ca, + const struct bkey *k, + unsigned ptr) +{ + return ca->buckets + PTR_BUCKET_NR(c, k, ptr); +} + +static inline u8 gen_after(u8 a, u8 b) +{ + u8 r = a - b; + + return r > 128U ? 0 : r; +} + +static inline u8 ptr_stale(struct cache_set *c, struct cache *ca, + const struct bkey *k, unsigned ptr) +{ + return gen_after(PTR_BUCKET_GEN(c, ca, k, ptr), PTR_GEN(k, ptr)); +} + +/* bucket heaps */ + +static inline bool bucket_min_cmp(struct bucket_heap_entry l, + struct bucket_heap_entry r) +{ + return l.val < r.val; +} + +static inline bool bucket_max_cmp(struct bucket_heap_entry l, + struct bucket_heap_entry r) +{ + return l.val > r.val; +} + +static inline void bucket_heap_push(struct cache *ca, struct bucket *g, + unsigned long val) +{ + struct bucket_heap_entry new = { g, val }; + + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, new, bucket_min_cmp); + else if (bucket_min_cmp(new, heap_peek(&ca->heap))) { + ca->heap.data[0] = new; + heap_sift(&ca->heap, 0, bucket_min_cmp); + } +} + +/* bucket gc marks */ /* The dirty and cached sector counts saturate. If this occurs, * reference counting alone will not free the bucket, and a btree @@ -141,4 +222,4 @@ u8 bch_mark_data_bucket(struct cache_set *, struct cache *, struct bkey *, unsigned, int, bool, bool); void bch_unmark_open_bucket(struct cache *, struct bucket *); -#endif +#endif /* _BUCKETS_H */ diff --git a/drivers/md/bcache/buckets_types.h b/drivers/md/bcache/buckets_types.h new file mode 100644 index 000000000000..ea63ce843150 --- /dev/null +++ b/drivers/md/bcache/buckets_types.h @@ -0,0 +1,49 @@ +#ifndef _BUCKETS_TYPES_H +#define _BUCKETS_TYPES_H + +struct bucket_mark { + union { + struct { + u32 counter; + }; + + struct { + unsigned owned_by_allocator:1; + unsigned cached_sectors:15; + unsigned is_metadata:1; + unsigned dirty_sectors:15; + }; + }; +}; + +struct bucket { + union { + struct { + u16 read_prio; + u16 write_prio; + }; + u16 prio[2]; + }; + struct bucket_mark mark; + u8 last_gc; /* Most out of date gen in the btree */ + + /* generation copygc is going to move this bucket into */ + u8 copygc_gen; +}; + +struct bucket_stats { + u64 buckets_dirty; + u64 buckets_cached; + u64 buckets_meta; + u64 buckets_alloc; + + u64 sectors_dirty; + u64 sectors_cached; +}; + +struct bucket_heap_entry { + struct bucket *g; + unsigned long val; +}; + +#endif /* _BUCKETS_TYPES_H */ diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index eb914f58f31d..3cc0df7dda80 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -7,10 +7,12 @@ #include "bcache.h" #include "btree.h" +#include "buckets.h" #include "debug.h" #include "extents.h" #include "io.h" #include "keybuf.h" +#include "super.h" #include <linux/console.h> #include <linux/debugfs.h> diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 2dbd272218ad..34149496a2e5 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -10,6 +10,7 @@ #include "debug.h" #include "extents.h" #include "inode.h" +#include "super.h" #include "writeback.h" #include <trace/events/bcache.h> diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 6432508e0e99..4cc5daedb904 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -9,11 +9,13 @@ #include "alloc.h" #include "bset.h" #include "btree.h" +#include "buckets.h" #include "debug.h" #include "extents.h" #include "io.h" #include "keybuf.h" #include "stats.h" +#include "super.h" #include <linux/blkdev.h> diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h index d8b771876690..3daa7d498428 100644 --- a/drivers/md/bcache/io.h +++ b/drivers/md/bcache/io.h @@ -69,4 +69,6 @@ bool cache_promote(struct cache_set *, struct bbio *, struct bkey *, unsigned); void bch_read_race_work(struct work_struct *work); +extern struct workqueue_struct *bcache_io_wq; + #endif /* _BCACHE_IO_H */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 770b72755641..5cb0c0231d9f 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -5,11 +5,13 @@ */ #include "bcache.h" +#include "buckets.h" #include "btree.h" #include "debug.h" #include "extents.h" #include "io.h" #include "journal.h" +#include "super.h" #include <trace/events/bcache.h> diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index 3dabd16d87f1..091caed980bc 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -107,6 +107,8 @@ * nodes that are pinning the oldest journal entries first. */ +#include "journal_types.h" + static inline struct jset_keys *jset_keys_next(struct jset_keys *j) { return (void *) (&j->d[j->keys]); diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h new file mode 100644 index 000000000000..a18f8bff5f85 --- /dev/null +++ b/drivers/md/bcache/journal_types.h @@ -0,0 +1,85 @@ +#ifndef _BCACHE_JOURNAL_TYPES_H +#define _BCACHE_JOURNAL_TYPES_H + +/* + * We put two of these in struct journal; we used them for writes to the + * journal that are being staged or in flight. + */ +struct journal_write { + struct jset *data; +#define JSET_BITS 5 + + struct cache_set *c; + struct closure_waitlist wait; +}; + +/* Embedded in struct cache_set */ +struct journal { + unsigned long flags; +#define JOURNAL_NEED_WRITE 0 +#define JOURNAL_DIRTY 1 +#define JOURNAL_REPLAY_DONE 2 + atomic_t in_flight; + + spinlock_t lock; + + unsigned u64s_remaining; + unsigned res_count; + + /* Number of blocks free in the bucket(s) we're currently writing to */ + unsigned blocks_free; + + /* used when waiting because the journal was full */ + wait_queue_head_t wait; + struct closure io; + struct delayed_work work; + + unsigned delay_ms; + + u64 seq; + DECLARE_FIFO(atomic_t, pin); + + BKEY_PADDED(key); + + struct journal_write w[2], *cur; +}; + +/* + * Embedded in struct cache. First three fields refer to the array of journal + * buckets, in cache_sb. + */ +struct journal_device { + /* + * For each journal bucket, contains the max sequence number of the + * journal writes it contains - so we know when a bucket can be reused. + */ + u64 *seq; + + /* Journal bucket we're currently writing to */ + unsigned cur_idx; + + /* Last journal bucket that still contains an open journal entry */ + unsigned last_idx; + + /* Next journal bucket to be discarded */ + unsigned discard_idx; + +#define DISCARD_READY 0 +#define DISCARD_IN_FLIGHT 1 +#define DISCARD_DONE 2 + /* 1 - discard in flight, -1 - discard completed */ + atomic_t discard_in_flight; + + struct work_struct discard_work; + struct bio discard_bio; + struct bio_vec discard_bv; + + /* Bio for journal reads/writes to this device */ + struct bio bio; + struct bio_vec bv[1 << JSET_BITS]; + + /* for bch_journal_read_device */ + struct closure read; +}; + +#endif /* _BCACHE_JOURNAL_TYPES_H */ diff --git a/drivers/md/bcache/keybuf.h b/drivers/md/bcache/keybuf.h index 17f3f18674a4..34ca18231b0e 100644 --- a/drivers/md/bcache/keybuf.h +++ b/drivers/md/bcache/keybuf.h @@ -1,10 +1,7 @@ #ifndef _BCACHE_KEYBUF_H #define _BCACHE_KEYBUF_H -struct bkey; -struct cache_set; -struct keybuf; -struct keybuf_key; +#include "keybuf_types.h" typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); diff --git a/drivers/md/bcache/keybuf_types.h b/drivers/md/bcache/keybuf_types.h new file mode 100644 index 000000000000..7c1293071cda --- /dev/null +++ b/drivers/md/bcache/keybuf_types.h @@ -0,0 +1,30 @@ +#ifndef _BCACHE_KEYBUF_TYPES_H +#define _BCACHE_KEYBUF_TYPES_H + +struct keybuf_key { + struct rb_node node; + BKEY_PADDED(key); + atomic_t ref; +}; + +struct keybuf { + struct bkey last_scanned; + spinlock_t lock; + + /* + * Beginning and end of range in rb tree - so that we can skip taking + * lock and checking the rb tree when we need to check for overlapping + * keys. + */ + struct bkey start; + struct bkey end; + + struct rb_root keys; + + struct semaphore in_flight; + +#define KEYBUF_NR 500 + DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); +}; + +#endif /* _BCACHE_KEYBUF_TYPES_H */ diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c index 23a0372fba69..8f24b2359d44 100644 --- a/drivers/md/bcache/move.c +++ b/drivers/md/bcache/move.c @@ -1,6 +1,7 @@ #include "bcache.h" #include "btree.h" +#include "buckets.h" #include "extents.h" #include "io.h" #include "keybuf.h" diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6ad3b54f5bfa..05250bbe792a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -10,6 +10,7 @@ #include "alloc.h" #include "btree.h" #include "debug.h" +#include "inode.h" #include "io.h" #include "journal.h" #include "movinggc.h" @@ -17,6 +18,7 @@ #include "writeback.h" #include <linux/blkdev.h> +#include <linux/crc32c.h> #include <linux/debugfs.h> #include <linux/genhd.h> #include <linux/idr.h> @@ -2441,6 +2443,13 @@ err: /* Global interfaces/init */ +#define kobj_attribute_write(n, fn) \ + static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) + +#define kobj_attribute_rw(n, show, store) \ + static struct kobj_attribute ksysfs_##n = \ + __ATTR(n, S_IWUSR|S_IRUSR, show, store) + static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, const char *, size_t); diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h new file mode 100644 index 000000000000..455cf1bdb292 --- /dev/null +++ b/drivers/md/bcache/super.h @@ -0,0 +1,174 @@ +#ifndef _BCACHE_SUPER_H +#define _BCACHE_SUPER_H + +#include "extents.h" + +static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) +{ + return s >> c->bucket_bits; +} + +static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) +{ + return ((sector_t) b) << c->bucket_bits; +} + +static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) +{ + return s & (c->sb.bucket_size - 1); +} + +static inline struct cache_member *cache_member_info(struct cache *ca) +{ + return ca->set->members + ca->sb.nr_this_dev; +} + +static inline struct cache *bch_next_cache_rcu(struct cache_set *c, + unsigned *iter) +{ + struct cache *ret = NULL; + + while (*iter < c->sb.nr_in_set && + !(ret = rcu_dereference(c->cache[*iter]))) + (*iter)++; + + return ret; +} + +#define for_each_cache_rcu(ca, c, iter) \ + for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++) + +static inline struct cache *bch_get_next_cache(struct cache_set *c, + unsigned *iter) +{ + struct cache *ret; + + rcu_read_lock(); + if ((ret = bch_next_cache_rcu(c, iter))) + percpu_ref_get(&ret->ref); + rcu_read_unlock(); + + return ret; +} + +/* + * If you break early, you must drop your ref on the current cache + */ +#define for_each_cache(ca, c, iter) \ + for ((iter) = 0; \ + (ca = bch_get_next_cache(c, &(iter))); \ + percpu_ref_put(&ca->ref), (iter)++) + +static inline void cached_dev_put(struct cached_dev *dc) +{ + if (atomic_dec_and_test(&dc->count)) + schedule_work(&dc->detach); +} + +static inline bool cached_dev_get(struct cached_dev *dc) +{ + if (!atomic_inc_not_zero(&dc->count)) + return false; + + /* Paired with the mb in cached_dev_attach */ + smp_mb__after_atomic(); + return true; +} + +static inline u64 bcache_dev_inum(struct bcache_device *d) +{ + return KEY_INODE(&d->inode.i_inode.i_key); +} + +static inline struct bcache_device *bch_dev_find(struct cache_set *c, + u64 inode) +{ + return radix_tree_lookup(&c->devices, inode); +} + +__printf(2, 3) +bool bch_cache_set_error(struct cache_set *, const char *, ...); + +u64 bch_checksum_update(unsigned, u64, const void *, size_t); +u64 bch_checksum(unsigned, const void *, size_t); + +/* + * This is used for various on disk data structures - cache_sb, prio_set, bset, + * jset: The checksum is _always_ the first 8 bytes of these structs + */ +#define csum_set(i, type) \ +({ \ + void *start = ((void *) (i)) + sizeof(u64); \ + void *end = bset_bkey_last(i); \ + \ + bch_checksum(type, start, end - start); \ +}) + +void bch_prio_write(struct cache *); + +void bch_check_mark_super_slowpath(struct cache_set *, struct bkey *, bool); + +static inline bool bch_check_super_marked(struct cache_set *c, + struct bkey *k, bool meta) +{ + unsigned ptr; + struct cache_member *mi; + + for (ptr = 0; ptr < bch_extent_ptrs(k); ptr++) { + mi = c->members + PTR_DEV(k, ptr); + + if (!(meta ? CACHE_HAS_METADATA : CACHE_HAS_DATA)(mi)) + return false; + } + + return true; +} + +static inline void bch_check_mark_super(struct cache_set *c, + struct bkey *k, bool meta) +{ + if (bch_check_super_marked(c, k, meta)) + return; + + bch_check_mark_super_slowpath(c, k, meta); +} + +int bch_super_realloc(struct cache *, unsigned); +void bcache_write_super(struct cache_set *); + +void bch_write_bdev_super(struct cached_dev *, struct closure *); + +void bch_cached_dev_release(struct kobject *); +void bch_flash_dev_release(struct kobject *); +void bch_cache_set_release(struct kobject *); +void bch_cache_release(struct kobject *); + +void bch_cache_set_unregister(struct cache_set *); +void bch_cache_set_stop(struct cache_set *); + +const char *register_bcache_devices(char **, int, struct cache_set **); +const char *bch_run_cache_set(struct cache_set *); + +int bch_flash_dev_create(struct cache_set *, u64); + +int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); +void bch_cached_dev_detach(struct cached_dev *); +void bch_cached_dev_run(struct cached_dev *); +void bcache_device_stop(struct bcache_device *); + +void bch_cache_read_only(struct cache *); +const char *bch_cache_read_write(struct cache *); +void bch_cache_remove(struct cache *); +int bch_cache_add(struct cache_set *, const char *); + +extern struct mutex bch_register_lock; +extern struct list_head bch_cache_sets; +extern struct idr bch_cache_set_minor; + +extern struct kobj_type bch_cached_dev_ktype; +extern struct kobj_type bch_flash_dev_ktype; +extern struct kobj_type bch_cache_set_ktype; +extern struct kobj_type bch_cache_set_internal_ktype; +extern struct kobj_type bch_cache_ktype; + +#endif /* _BCACHE_SUPER_H */ diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 83076355847d..2883f36ec6d6 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -9,8 +9,10 @@ #include "alloc.h" #include "sysfs.h" #include "btree.h" +#include "inode.h" #include "journal.h" #include "request.h" +#include "super.h" #include "writeback.h" #include <linux/blkdev.h> diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 10bdc00a7933..a29e87278414 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -1,5 +1,6 @@ #include "bcache.h" #include "btree.h" +#include "buckets.h" #include <linux/blktrace_api.h> |