summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/alloc.c1
-rw-r--r--drivers/md/bcache/alloc.h2
-rw-r--r--drivers/md/bcache/alloc_types.h72
-rw-r--r--drivers/md/bcache/bcache.h490
-rw-r--r--drivers/md/bcache/btree.c1
-rw-r--r--drivers/md/bcache/btree.h18
-rw-r--r--drivers/md/bcache/buckets.h85
-rw-r--r--drivers/md/bcache/buckets_types.h49
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/extents.c1
-rw-r--r--drivers/md/bcache/io.c2
-rw-r--r--drivers/md/bcache/io.h2
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/journal.h2
-rw-r--r--drivers/md/bcache/journal_types.h85
-rw-r--r--drivers/md/bcache/keybuf.h5
-rw-r--r--drivers/md/bcache/keybuf_types.h30
-rw-r--r--drivers/md/bcache/move.c1
-rw-r--r--drivers/md/bcache/super.c9
-rw-r--r--drivers/md/bcache/super.h174
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/trace.c1
22 files changed, 544 insertions, 492 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 9703523a338f..862c516e6d5f 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -58,6 +58,7 @@
#include "btree.h"
#include "buckets.h"
#include "extents.h"
+#include "super.h"
#include <linux/blkdev.h>
#include <linux/kthread.h>
diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h
index bddd80d2a6c1..9934f1558029 100644
--- a/drivers/md/bcache/alloc.h
+++ b/drivers/md/bcache/alloc.h
@@ -1,7 +1,7 @@
#ifndef _BCACHE_ALLOC_H
#define _BCACHE_ALLOC_H
-#include "bcache.h"
+#include "alloc_types.h"
struct bkey;
struct bucket;
diff --git a/drivers/md/bcache/alloc_types.h b/drivers/md/bcache/alloc_types.h
new file mode 100644
index 000000000000..5741c58f6d98
--- /dev/null
+++ b/drivers/md/bcache/alloc_types.h
@@ -0,0 +1,72 @@
+#ifndef _BCACHE_ALLOC_TYPES_H
+#define _BCACHE_ALLOC_TYPES_H
+
+/* There is one reserve for each type of btree, one for prios and gens
+ * and one for moving GC */
+enum alloc_reserve {
+ RESERVE_PRIO = BTREE_ID_NR,
+ /*
+ * free_inc.size buckets are set aside for moving GC btree node
+ * allocations. This means that if moving GC runs out of new buckets for
+ * btree nodes, it will have put back at least free_inc.size buckets
+ * back on free_inc, preventing a deadlock.
+ *
+ * XXX: figure out a less stupid way of achieving this
+ */
+ RESERVE_MOVINGGC_BTREE,
+ /*
+ * Tiering needs a btree node reserve because of how
+ * btree_check_reserve() works -- if the cache tier is full, we don't
+ * want tiering to block forever.
+ */
+ RESERVE_TIERING_BTREE,
+ RESERVE_METADATA_LAST = RESERVE_TIERING_BTREE,
+ RESERVE_MOVINGGC,
+ RESERVE_NONE,
+ RESERVE_NR,
+};
+
+/*
+ * The btree node reserve needs to contain enough buckets so that in a tree of
+ * depth 2, we can split each level of node, and then allocate a new root.
+ * See btree_check_reserve().
+ */
+#define BTREE_NODE_RESERVE 7
+
+/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
+#define OPEN_BUCKETS_COUNT 256
+
+#define WRITE_POINT_COUNT 16
+
+struct open_bucket {
+ struct list_head list;
+ spinlock_t lock;
+ atomic_t pin;
+ unsigned sectors_free;
+ BKEY_PADDED(key);
+};
+
+struct write_point {
+ struct open_bucket *b;
+
+ /*
+ * If not NULL, refill from that device (this write point is a member of
+ * that struct cache)
+ *
+ * If NULL, do a normal replicated bucket allocation
+ */
+ struct cache *ca;
+
+ /*
+ * If not NULL, tier specific writepoint used by tiering/promotion -
+ * always allocates a single replica
+ */
+ struct cache_group *tier;
+
+ /*
+ * Otherwise do a normal replicated bucket allocation that could come
+ * from any tier (foreground write)
+ */
+};
+
+#endif /* _BCACHE_ALLOC_TYPES_H */
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 8b27e2471d0c..4825a5f8246c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -179,7 +179,6 @@
#include <linux/bcache.h>
#include <linux/bio.h>
-#include <linux/crc32c.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -193,7 +192,6 @@
#include <linux/workqueue.h>
#include "bset.h"
-#include "extents.h"
#include "util.h"
#include "closure.h"
@@ -208,148 +206,13 @@
#define CACHE_RESERVE_PERCENT 20
-struct bucket_mark {
- union {
- struct {
- u32 counter;
- };
-
- struct {
- unsigned owned_by_allocator:1;
- unsigned cached_sectors:15;
- unsigned is_metadata:1;
- unsigned dirty_sectors:15;
- };
- };
-};
-
-struct bucket {
- union {
- struct {
- u16 read_prio;
- u16 write_prio;
- };
- u16 prio[2];
- };
- struct bucket_mark mark;
- u8 last_gc; /* Most out of date gen in the btree */
-
- /* generation copygc is going to move this bucket into */
- u8 copygc_gen;
-};
-
+#include "alloc_types.h"
+#include "buckets_types.h"
+#include "journal_types.h"
+#include "keybuf_types.h"
#include "stats_types.h"
-#include "inode.h"
-struct search;
-struct btree;
-struct keybuf;
-
-/*
- * We put two of these in struct journal; we used them for writes to the
- * journal that are being staged or in flight.
- */
-struct journal_write {
- struct jset *data;
-#define JSET_BITS 5
-
- struct cache_set *c;
- struct closure_waitlist wait;
-};
-
-/* Embedded in struct cache_set */
-struct journal {
- unsigned long flags;
-#define JOURNAL_NEED_WRITE 0
-#define JOURNAL_DIRTY 1
-#define JOURNAL_REPLAY_DONE 2
- atomic_t in_flight;
-
- spinlock_t lock;
-
- unsigned u64s_remaining;
- unsigned res_count;
-
- /* Number of blocks free in the bucket(s) we're currently writing to */
- unsigned blocks_free;
-
- /* used when waiting because the journal was full */
- wait_queue_head_t wait;
- struct closure io;
- struct delayed_work work;
-
- unsigned delay_ms;
-
- u64 seq;
- DECLARE_FIFO(atomic_t, pin);
-
- BKEY_PADDED(key);
-
- struct journal_write w[2], *cur;
-};
-
-/*
- * Embedded in struct cache. First three fields refer to the array of journal
- * buckets, in cache_sb.
- */
-struct journal_device {
- /*
- * For each journal bucket, contains the max sequence number of the
- * journal writes it contains - so we know when a bucket can be reused.
- */
- u64 *seq;
-
- /* Journal bucket we're currently writing to */
- unsigned cur_idx;
-
- /* Last journal bucket that still contains an open journal entry */
- unsigned last_idx;
-
- /* Next journal bucket to be discarded */
- unsigned discard_idx;
-
-#define DISCARD_READY 0
-#define DISCARD_IN_FLIGHT 1
-#define DISCARD_DONE 2
- /* 1 - discard in flight, -1 - discard completed */
- atomic_t discard_in_flight;
-
- struct work_struct discard_work;
- struct bio discard_bio;
- struct bio_vec discard_bv;
- /* Bio for journal reads/writes to this device */
- struct bio bio;
- struct bio_vec bv[1 << JSET_BITS];
-
- /* for bch_journal_read_device */
- struct closure read;
-};
-
-struct keybuf_key {
- struct rb_node node;
- BKEY_PADDED(key);
- atomic_t ref;
-};
-
-struct keybuf {
- struct bkey last_scanned;
- spinlock_t lock;
-
- /*
- * Beginning and end of range in rb tree - so that we can skip taking
- * lock and checking the rb tree when we need to check for overlapping
- * keys.
- */
- struct bkey start;
- struct bkey end;
-
- struct rb_root keys;
-
- struct semaphore in_flight;
-
-#define KEYBUF_NR 500
- DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
-};
+struct btree;
struct bcache_device {
struct closure cl;
@@ -463,101 +326,6 @@ struct cached_dev {
unsigned char writeback_percent;
};
-/* There is one reserve for each type of btree, one for prios and gens
- * and one for moving GC */
-enum alloc_reserve {
- RESERVE_PRIO = BTREE_ID_NR,
- /*
- * free_inc.size buckets are set aside for moving GC btree node
- * allocations. This means that if moving GC runs out of new buckets for
- * btree nodes, it will have put back at least free_inc.size buckets
- * back on free_inc, preventing a deadlock.
- *
- * XXX: figure out a less stupid way of achieving this
- */
- RESERVE_MOVINGGC_BTREE,
- /*
- * Tiering needs a btree node reserve because of how
- * btree_check_reserve() works -- if the cache tier is full, we don't
- * want tiering to block forever.
- */
- RESERVE_TIERING_BTREE,
- RESERVE_METADATA_LAST = RESERVE_TIERING_BTREE,
- RESERVE_MOVINGGC,
- RESERVE_NONE,
- RESERVE_NR,
-};
-
-/*
- * The btree node reserve needs to contain enough buckets so that in a tree of
- * depth 2, we can split each level of node, and then allocate a new root.
- * See btree_check_reserve().
- */
-#define BTREE_NODE_RESERVE 7
-
-/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
-#define OPEN_BUCKETS_COUNT 256
-
-#define WRITE_POINT_COUNT 16
-
-struct open_bucket {
- struct list_head list;
- spinlock_t lock;
- atomic_t pin;
- unsigned sectors_free;
- BKEY_PADDED(key);
-};
-
-struct write_point {
- struct open_bucket *b;
-
- /*
- * If not NULL, refill from that device (this write point is a member of
- * that struct cache)
- *
- * If NULL, do a normal replicated bucket allocation
- */
- struct cache *ca;
-
- /*
- * If not NULL, tier specific writepoint used by tiering/promotion -
- * always allocates a single replica
- */
- struct cache_group *tier;
-
- /*
- * Otherwise do a normal replicated bucket allocation that could come
- * from any tier (foreground write)
- */
-};
-
-struct bucket_stats {
- u64 buckets_dirty;
- u64 buckets_cached;
- u64 buckets_meta;
- u64 buckets_alloc;
-
- u64 sectors_dirty;
- u64 sectors_cached;
-};
-
-struct bucket_heap_entry {
- struct bucket *g;
- unsigned long val;
-};
-
-static inline bool bucket_min_cmp(struct bucket_heap_entry l,
- struct bucket_heap_entry r)
-{
- return l.val < r.val;
-}
-
-static inline bool bucket_max_cmp(struct bucket_heap_entry l,
- struct bucket_heap_entry r)
-{
- return l.val > r.val;
-}
-
#define CACHE_DEV_REMOVING 0
struct cache {
@@ -657,19 +425,6 @@ struct cache {
atomic_long_t sectors_written;
};
-static inline void bucket_heap_push(struct cache *ca, struct bucket *g,
- unsigned long val)
-{
- struct bucket_heap_entry new = { g, val };
-
- if (!heap_full(&ca->heap))
- heap_add(&ca->heap, new, bucket_min_cmp);
- else if (bucket_min_cmp(new, heap_peek(&ca->heap))) {
- ca->heap.data[0] = new;
- heap_sift(&ca->heap, 0, bucket_min_cmp);
- }
-}
-
struct gc_stat {
size_t nodes;
size_t key_bytes;
@@ -934,13 +689,6 @@ struct bbio {
#define to_bbio(_bio) container_of((_bio), struct bbio, bio)
-#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
-#define btree_blocks(b) \
- ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
-
-#define btree_default_blocks(c) \
- ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
-
#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
#define block_bytes(c) ((c)->sb.block_size << 9)
@@ -951,89 +699,6 @@ struct bbio {
#define prio_buckets(c) \
DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
-static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
-{
- return s >> c->bucket_bits;
-}
-
-static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
-{
- return ((sector_t) b) << c->bucket_bits;
-}
-
-static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
-{
- return s & (c->sb.bucket_size - 1);
-}
-
-static inline struct cache_member *cache_member_info(struct cache *ca)
-{
- return ca->set->members + ca->sb.nr_this_dev;
-}
-
-static inline struct cache *PTR_CACHE(struct cache_set *c,
- const struct bkey *k,
- unsigned ptr)
-{
- unsigned dev = PTR_DEV(k, ptr);
-
- return dev < MAX_CACHES_PER_SET
- ? rcu_dereference(c->cache[dev])
- : NULL;
-}
-
-static inline size_t PTR_BUCKET_NR(struct cache_set *c,
- const struct bkey *k,
- unsigned ptr)
-{
- return sector_to_bucket(c, PTR_OFFSET(k, ptr));
-}
-
-static inline u8 PTR_BUCKET_GEN(struct cache_set *c,
- struct cache *ca,
- const struct bkey *k,
- unsigned ptr)
-{
- return ca->bucket_gens[PTR_BUCKET_NR(c, k, ptr)];
-}
-
-static inline struct bucket *PTR_BUCKET(struct cache_set *c,
- struct cache *ca,
- const struct bkey *k,
- unsigned ptr)
-{
- return ca->buckets + PTR_BUCKET_NR(c, k, ptr);
-}
-
-static inline uint8_t gen_after(uint8_t a, uint8_t b)
-{
- uint8_t r = a - b;
- return r > 128U ? 0 : r;
-}
-
-static inline u8 ptr_stale(struct cache_set *c, struct cache *ca,
- const struct bkey *k, unsigned ptr)
-{
- return gen_after(PTR_BUCKET_GEN(c, ca, k, ptr), PTR_GEN(k, ptr));
-}
-
-/* checksumming */
-
-u64 bch_checksum_update(unsigned, u64, const void *, size_t);
-u64 bch_checksum(unsigned, const void *, size_t);
-
-/*
- * This is used for various on disk data structures - cache_sb, prio_set, bset,
- * jset: The checksum is _always_ the first 8 bytes of these structs
- */
-#define csum_set(i, type) \
-({ \
- void *start = ((void *) (i)) + sizeof(u64); \
- void *end = bset_bkey_last(i); \
- \
- bch_checksum(type, start, end - start); \
-})
-
/* Error handling macros */
#define btree_bug(b, ...) \
@@ -1066,153 +731,8 @@ do { \
bch_cache_set_error(c, __VA_ARGS__); \
} while (0)
-/* Looping macros */
-
-static inline struct cache *bch_next_cache_rcu(struct cache_set *c,
- unsigned *iter)
-{
- struct cache *ret = NULL;
-
- while (*iter < c->sb.nr_in_set &&
- !(ret = rcu_dereference(c->cache[*iter])))
- (*iter)++;
-
- return ret;
-}
-
-#define for_each_cache_rcu(ca, c, iter) \
- for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++)
-
-static inline struct cache *bch_get_next_cache(struct cache_set *c,
- unsigned *iter)
-{
- struct cache *ret;
-
- rcu_read_lock();
- if ((ret = bch_next_cache_rcu(c, iter)))
- percpu_ref_get(&ret->ref);
- rcu_read_unlock();
-
- return ret;
-}
-
-/*
- * If you break early, you must drop your ref on the current cache
- */
-#define for_each_cache(ca, c, iter) \
- for ((iter) = 0; \
- (ca = bch_get_next_cache(c, &(iter))); \
- percpu_ref_put(&ca->ref), (iter)++)
-
-#define for_each_bucket(b, ca) \
- for (b = (ca)->buckets + (ca)->sb.first_bucket; \
- b < (ca)->buckets + (ca)->sb.nbuckets; b++)
-
-static inline void cached_dev_put(struct cached_dev *dc)
-{
- if (atomic_dec_and_test(&dc->count))
- schedule_work(&dc->detach);
-}
-
-static inline bool cached_dev_get(struct cached_dev *dc)
-{
- if (!atomic_inc_not_zero(&dc->count))
- return false;
-
- /* Paired with the mb in cached_dev_attach */
- smp_mb__after_atomic();
- return true;
-}
-
-static inline u64 bcache_dev_inum(struct bcache_device *d)
-{
- return KEY_INODE(&d->inode.i_inode.i_key);
-}
-
-static inline struct bcache_device *bch_dev_find(struct cache_set *c, u64 inode)
-{
- return radix_tree_lookup(&c->devices, inode);
-}
-
-#define kobj_attribute_write(n, fn) \
- static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
-
-#define kobj_attribute_rw(n, show, store) \
- static struct kobj_attribute ksysfs_##n = \
- __ATTR(n, S_IWUSR|S_IRUSR, show, store)
-
-/* superblock */
-
-void bch_check_mark_super_slowpath(struct cache_set *, struct bkey *, bool);
-
-static inline bool bch_check_super_marked(struct cache_set *c,
- struct bkey *k, bool meta)
-{
- unsigned ptr;
- struct cache_member *mi;
-
- for (ptr = 0; ptr < bch_extent_ptrs(k); ptr++) {
- mi = c->members + PTR_DEV(k, ptr);
-
- if (!(meta ? CACHE_HAS_METADATA : CACHE_HAS_DATA)(mi))
- return false;
- }
-
- return true;
-}
-
-static inline void bch_check_mark_super(struct cache_set *c,
- struct bkey *k, bool meta)
-{
- if (bch_check_super_marked(c, k, meta))
- return;
-
- bch_check_mark_super_slowpath(c, k, meta);
-}
-
/* Forward declarations */
-__printf(2, 3)
-bool bch_cache_set_error(struct cache_set *, const char *, ...);
-
-void bch_prio_write(struct cache *);
-void bch_write_bdev_super(struct cached_dev *, struct closure *);
-
-extern struct workqueue_struct *bcache_io_wq;
-extern struct mutex bch_register_lock;
-extern struct list_head bch_cache_sets;
-
-extern struct kobj_type bch_cached_dev_ktype;
-extern struct kobj_type bch_flash_dev_ktype;
-extern struct kobj_type bch_cache_set_ktype;
-extern struct kobj_type bch_cache_set_internal_ktype;
-extern struct kobj_type bch_cache_ktype;
-
-void bch_cached_dev_release(struct kobject *);
-void bch_flash_dev_release(struct kobject *);
-void bch_cache_set_release(struct kobject *);
-void bch_cache_release(struct kobject *);
-
-int bch_super_realloc(struct cache *, unsigned);
-void bcache_write_super(struct cache_set *);
-
-int bch_flash_dev_create(struct cache_set *, u64);
-
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
-void bch_cached_dev_detach(struct cached_dev *);
-void bch_cached_dev_run(struct cached_dev *);
-void bcache_device_stop(struct bcache_device *);
-
-void bch_cache_set_unregister(struct cache_set *);
-void bch_cache_set_stop(struct cache_set *);
-
-void bch_cache_read_only(struct cache *);
-const char *bch_cache_read_write(struct cache *);
-void bch_cache_remove(struct cache *);
-int bch_cache_add(struct cache_set *, const char *);
-
-void bch_btree_cache_free(struct cache_set *);
-int bch_btree_cache_alloc(struct cache_set *);
void bch_tiering_init_cache_set(struct cache_set *);
int bch_tiering_thread_start(struct cache_set *c);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index d1bd7b4bfae6..2d22463bd977 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -29,6 +29,7 @@
#include "io.h"
#include "journal.h"
#include "movinggc.h"
+#include "super.h"
#include "writeback.h"
#include <linux/slab.h>
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 4eb60cd5baff..4bc176b4d763 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -171,6 +171,21 @@ static inline void set_gc_sectors(struct cache_set *c)
atomic64_set(&c->sectors_until_gc, c->capacity / 16);
}
+static inline size_t btree_bytes(struct cache_set *c)
+{
+ return c->btree_pages * PAGE_SIZE;
+}
+
+static inline unsigned btree_blocks(struct btree *b)
+{
+ return KEY_SIZE(&b->key) >> b->c->block_bits;
+}
+
+static inline unsigned btree_default_blocks(struct cache_set *c)
+{
+ return (PAGE_SECTORS * c->btree_pages) >> c->block_bits;
+}
+
/* Looping macros */
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
@@ -254,6 +269,9 @@ int bch_initial_gc(struct cache_set *, struct list_head *);
void bch_mark_keybuf_keys(struct cache_set *, struct keybuf *);
u8 __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
+void bch_btree_cache_free(struct cache_set *);
+int bch_btree_cache_alloc(struct cache_set *);
+
/* Return values from @fn parameter to map_keys and map_nodes */
#define MAP_DONE 0 /* We're done */
#define MAP_CONTINUE 1 /* Continue and advance the iterator */
diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h
index 6d2ee701b7f7..65575b47ff82 100644
--- a/drivers/md/bcache/buckets.h
+++ b/drivers/md/bcache/buckets.h
@@ -7,7 +7,88 @@
#ifndef _BUCKETS_H
#define _BUCKETS_H
-#include "bcache.h"
+#include "buckets_types.h"
+#include "super.h"
+
+#define for_each_bucket(b, ca) \
+ for (b = (ca)->buckets + (ca)->sb.first_bucket; \
+ b < (ca)->buckets + (ca)->sb.nbuckets; b++)
+
+static inline struct cache *PTR_CACHE(struct cache_set *c,
+ const struct bkey *k,
+ unsigned ptr)
+{
+ unsigned dev = PTR_DEV(k, ptr);
+
+ return dev < MAX_CACHES_PER_SET
+ ? rcu_dereference(c->cache[dev])
+ : NULL;
+}
+
+static inline size_t PTR_BUCKET_NR(struct cache_set *c,
+ const struct bkey *k,
+ unsigned ptr)
+{
+ return sector_to_bucket(c, PTR_OFFSET(k, ptr));
+}
+
+static inline u8 PTR_BUCKET_GEN(struct cache_set *c,
+ struct cache *ca,
+ const struct bkey *k,
+ unsigned ptr)
+{
+ return ca->bucket_gens[PTR_BUCKET_NR(c, k, ptr)];
+}
+
+static inline struct bucket *PTR_BUCKET(struct cache_set *c,
+ struct cache *ca,
+ const struct bkey *k,
+ unsigned ptr)
+{
+ return ca->buckets + PTR_BUCKET_NR(c, k, ptr);
+}
+
+static inline u8 gen_after(u8 a, u8 b)
+{
+ u8 r = a - b;
+
+ return r > 128U ? 0 : r;
+}
+
+static inline u8 ptr_stale(struct cache_set *c, struct cache *ca,
+ const struct bkey *k, unsigned ptr)
+{
+ return gen_after(PTR_BUCKET_GEN(c, ca, k, ptr), PTR_GEN(k, ptr));
+}
+
+/* bucket heaps */
+
+static inline bool bucket_min_cmp(struct bucket_heap_entry l,
+ struct bucket_heap_entry r)
+{
+ return l.val < r.val;
+}
+
+static inline bool bucket_max_cmp(struct bucket_heap_entry l,
+ struct bucket_heap_entry r)
+{
+ return l.val > r.val;
+}
+
+static inline void bucket_heap_push(struct cache *ca, struct bucket *g,
+ unsigned long val)
+{
+ struct bucket_heap_entry new = { g, val };
+
+ if (!heap_full(&ca->heap))
+ heap_add(&ca->heap, new, bucket_min_cmp);
+ else if (bucket_min_cmp(new, heap_peek(&ca->heap))) {
+ ca->heap.data[0] = new;
+ heap_sift(&ca->heap, 0, bucket_min_cmp);
+ }
+}
+
+/* bucket gc marks */
/* The dirty and cached sector counts saturate. If this occurs,
* reference counting alone will not free the bucket, and a btree
@@ -141,4 +222,4 @@ u8 bch_mark_data_bucket(struct cache_set *, struct cache *, struct bkey *,
unsigned, int, bool, bool);
void bch_unmark_open_bucket(struct cache *, struct bucket *);
-#endif
+#endif /* _BUCKETS_H */
diff --git a/drivers/md/bcache/buckets_types.h b/drivers/md/bcache/buckets_types.h
new file mode 100644
index 000000000000..ea63ce843150
--- /dev/null
+++ b/drivers/md/bcache/buckets_types.h
@@ -0,0 +1,49 @@
+#ifndef _BUCKETS_TYPES_H
+#define _BUCKETS_TYPES_H
+
+struct bucket_mark {
+ union {
+ struct {
+ u32 counter;
+ };
+
+ struct {
+ unsigned owned_by_allocator:1;
+ unsigned cached_sectors:15;
+ unsigned is_metadata:1;
+ unsigned dirty_sectors:15;
+ };
+ };
+};
+
+struct bucket {
+ union {
+ struct {
+ u16 read_prio;
+ u16 write_prio;
+ };
+ u16 prio[2];
+ };
+ struct bucket_mark mark;
+ u8 last_gc; /* Most out of date gen in the btree */
+
+ /* generation copygc is going to move this bucket into */
+ u8 copygc_gen;
+};
+
+struct bucket_stats {
+ u64 buckets_dirty;
+ u64 buckets_cached;
+ u64 buckets_meta;
+ u64 buckets_alloc;
+
+ u64 sectors_dirty;
+ u64 sectors_cached;
+};
+
+struct bucket_heap_entry {
+ struct bucket *g;
+ unsigned long val;
+};
+
+#endif /* _BUCKETS_TYPES_H */
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index eb914f58f31d..3cc0df7dda80 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -7,10 +7,12 @@
#include "bcache.h"
#include "btree.h"
+#include "buckets.h"
#include "debug.h"
#include "extents.h"
#include "io.h"
#include "keybuf.h"
+#include "super.h"
#include <linux/console.h>
#include <linux/debugfs.h>
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 2dbd272218ad..34149496a2e5 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -10,6 +10,7 @@
#include "debug.h"
#include "extents.h"
#include "inode.h"
+#include "super.h"
#include "writeback.h"
#include <trace/events/bcache.h>
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 6432508e0e99..4cc5daedb904 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -9,11 +9,13 @@
#include "alloc.h"
#include "bset.h"
#include "btree.h"
+#include "buckets.h"
#include "debug.h"
#include "extents.h"
#include "io.h"
#include "keybuf.h"
#include "stats.h"
+#include "super.h"
#include <linux/blkdev.h>
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index d8b771876690..3daa7d498428 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -69,4 +69,6 @@ bool cache_promote(struct cache_set *, struct bbio *, struct bkey *, unsigned);
void bch_read_race_work(struct work_struct *work);
+extern struct workqueue_struct *bcache_io_wq;
+
#endif /* _BCACHE_IO_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 770b72755641..5cb0c0231d9f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -5,11 +5,13 @@
*/
#include "bcache.h"
+#include "buckets.h"
#include "btree.h"
#include "debug.h"
#include "extents.h"
#include "io.h"
#include "journal.h"
+#include "super.h"
#include <trace/events/bcache.h>
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index 3dabd16d87f1..091caed980bc 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -107,6 +107,8 @@
* nodes that are pinning the oldest journal entries first.
*/
+#include "journal_types.h"
+
static inline struct jset_keys *jset_keys_next(struct jset_keys *j)
{
return (void *) (&j->d[j->keys]);
diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h
new file mode 100644
index 000000000000..a18f8bff5f85
--- /dev/null
+++ b/drivers/md/bcache/journal_types.h
@@ -0,0 +1,85 @@
+#ifndef _BCACHE_JOURNAL_TYPES_H
+#define _BCACHE_JOURNAL_TYPES_H
+
+/*
+ * We put two of these in struct journal; we used them for writes to the
+ * journal that are being staged or in flight.
+ */
+struct journal_write {
+ struct jset *data;
+#define JSET_BITS 5
+
+ struct cache_set *c;
+ struct closure_waitlist wait;
+};
+
+/* Embedded in struct cache_set */
+struct journal {
+ unsigned long flags;
+#define JOURNAL_NEED_WRITE 0
+#define JOURNAL_DIRTY 1
+#define JOURNAL_REPLAY_DONE 2
+ atomic_t in_flight;
+
+ spinlock_t lock;
+
+ unsigned u64s_remaining;
+ unsigned res_count;
+
+ /* Number of blocks free in the bucket(s) we're currently writing to */
+ unsigned blocks_free;
+
+ /* used when waiting because the journal was full */
+ wait_queue_head_t wait;
+ struct closure io;
+ struct delayed_work work;
+
+ unsigned delay_ms;
+
+ u64 seq;
+ DECLARE_FIFO(atomic_t, pin);
+
+ BKEY_PADDED(key);
+
+ struct journal_write w[2], *cur;
+};
+
+/*
+ * Embedded in struct cache. First three fields refer to the array of journal
+ * buckets, in cache_sb.
+ */
+struct journal_device {
+ /*
+ * For each journal bucket, contains the max sequence number of the
+ * journal writes it contains - so we know when a bucket can be reused.
+ */
+ u64 *seq;
+
+ /* Journal bucket we're currently writing to */
+ unsigned cur_idx;
+
+ /* Last journal bucket that still contains an open journal entry */
+ unsigned last_idx;
+
+ /* Next journal bucket to be discarded */
+ unsigned discard_idx;
+
+#define DISCARD_READY 0
+#define DISCARD_IN_FLIGHT 1
+#define DISCARD_DONE 2
+ /* 1 - discard in flight, -1 - discard completed */
+ atomic_t discard_in_flight;
+
+ struct work_struct discard_work;
+ struct bio discard_bio;
+ struct bio_vec discard_bv;
+
+ /* Bio for journal reads/writes to this device */
+ struct bio bio;
+ struct bio_vec bv[1 << JSET_BITS];
+
+ /* for bch_journal_read_device */
+ struct closure read;
+};
+
+#endif /* _BCACHE_JOURNAL_TYPES_H */
diff --git a/drivers/md/bcache/keybuf.h b/drivers/md/bcache/keybuf.h
index 17f3f18674a4..34ca18231b0e 100644
--- a/drivers/md/bcache/keybuf.h
+++ b/drivers/md/bcache/keybuf.h
@@ -1,10 +1,7 @@
#ifndef _BCACHE_KEYBUF_H
#define _BCACHE_KEYBUF_H
-struct bkey;
-struct cache_set;
-struct keybuf;
-struct keybuf_key;
+#include "keybuf_types.h"
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
diff --git a/drivers/md/bcache/keybuf_types.h b/drivers/md/bcache/keybuf_types.h
new file mode 100644
index 000000000000..7c1293071cda
--- /dev/null
+++ b/drivers/md/bcache/keybuf_types.h
@@ -0,0 +1,30 @@
+#ifndef _BCACHE_KEYBUF_TYPES_H
+#define _BCACHE_KEYBUF_TYPES_H
+
+struct keybuf_key {
+ struct rb_node node;
+ BKEY_PADDED(key);
+ atomic_t ref;
+};
+
+struct keybuf {
+ struct bkey last_scanned;
+ spinlock_t lock;
+
+ /*
+ * Beginning and end of range in rb tree - so that we can skip taking
+ * lock and checking the rb tree when we need to check for overlapping
+ * keys.
+ */
+ struct bkey start;
+ struct bkey end;
+
+ struct rb_root keys;
+
+ struct semaphore in_flight;
+
+#define KEYBUF_NR 500
+ DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
+};
+
+#endif /* _BCACHE_KEYBUF_TYPES_H */
diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c
index 23a0372fba69..8f24b2359d44 100644
--- a/drivers/md/bcache/move.c
+++ b/drivers/md/bcache/move.c
@@ -1,6 +1,7 @@
#include "bcache.h"
#include "btree.h"
+#include "buckets.h"
#include "extents.h"
#include "io.h"
#include "keybuf.h"
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6ad3b54f5bfa..05250bbe792a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,6 +10,7 @@
#include "alloc.h"
#include "btree.h"
#include "debug.h"
+#include "inode.h"
#include "io.h"
#include "journal.h"
#include "movinggc.h"
@@ -17,6 +18,7 @@
#include "writeback.h"
#include <linux/blkdev.h>
+#include <linux/crc32c.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
#include <linux/idr.h>
@@ -2441,6 +2443,13 @@ err:
/* Global interfaces/init */
+#define kobj_attribute_write(n, fn) \
+ static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
+
+#define kobj_attribute_rw(n, show, store) \
+ static struct kobj_attribute ksysfs_##n = \
+ __ATTR(n, S_IWUSR|S_IRUSR, show, store)
+
static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
const char *, size_t);
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
new file mode 100644
index 000000000000..455cf1bdb292
--- /dev/null
+++ b/drivers/md/bcache/super.h
@@ -0,0 +1,174 @@
+#ifndef _BCACHE_SUPER_H
+#define _BCACHE_SUPER_H
+
+#include "extents.h"
+
+static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
+{
+ return s >> c->bucket_bits;
+}
+
+static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
+{
+ return ((sector_t) b) << c->bucket_bits;
+}
+
+static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
+{
+ return s & (c->sb.bucket_size - 1);
+}
+
+static inline struct cache_member *cache_member_info(struct cache *ca)
+{
+ return ca->set->members + ca->sb.nr_this_dev;
+}
+
+static inline struct cache *bch_next_cache_rcu(struct cache_set *c,
+ unsigned *iter)
+{
+ struct cache *ret = NULL;
+
+ while (*iter < c->sb.nr_in_set &&
+ !(ret = rcu_dereference(c->cache[*iter])))
+ (*iter)++;
+
+ return ret;
+}
+
+#define for_each_cache_rcu(ca, c, iter) \
+ for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++)
+
+static inline struct cache *bch_get_next_cache(struct cache_set *c,
+ unsigned *iter)
+{
+ struct cache *ret;
+
+ rcu_read_lock();
+ if ((ret = bch_next_cache_rcu(c, iter)))
+ percpu_ref_get(&ret->ref);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/*
+ * If you break early, you must drop your ref on the current cache
+ */
+#define for_each_cache(ca, c, iter) \
+ for ((iter) = 0; \
+ (ca = bch_get_next_cache(c, &(iter))); \
+ percpu_ref_put(&ca->ref), (iter)++)
+
+static inline void cached_dev_put(struct cached_dev *dc)
+{
+ if (atomic_dec_and_test(&dc->count))
+ schedule_work(&dc->detach);
+}
+
+static inline bool cached_dev_get(struct cached_dev *dc)
+{
+ if (!atomic_inc_not_zero(&dc->count))
+ return false;
+
+ /* Paired with the mb in cached_dev_attach */
+ smp_mb__after_atomic();
+ return true;
+}
+
+static inline u64 bcache_dev_inum(struct bcache_device *d)
+{
+ return KEY_INODE(&d->inode.i_inode.i_key);
+}
+
+static inline struct bcache_device *bch_dev_find(struct cache_set *c,
+ u64 inode)
+{
+ return radix_tree_lookup(&c->devices, inode);
+}
+
+__printf(2, 3)
+bool bch_cache_set_error(struct cache_set *, const char *, ...);
+
+u64 bch_checksum_update(unsigned, u64, const void *, size_t);
+u64 bch_checksum(unsigned, const void *, size_t);
+
+/*
+ * This is used for various on disk data structures - cache_sb, prio_set, bset,
+ * jset: The checksum is _always_ the first 8 bytes of these structs
+ */
+#define csum_set(i, type) \
+({ \
+ void *start = ((void *) (i)) + sizeof(u64); \
+ void *end = bset_bkey_last(i); \
+ \
+ bch_checksum(type, start, end - start); \
+})
+
+void bch_prio_write(struct cache *);
+
+void bch_check_mark_super_slowpath(struct cache_set *, struct bkey *, bool);
+
+static inline bool bch_check_super_marked(struct cache_set *c,
+ struct bkey *k, bool meta)
+{
+ unsigned ptr;
+ struct cache_member *mi;
+
+ for (ptr = 0; ptr < bch_extent_ptrs(k); ptr++) {
+ mi = c->members + PTR_DEV(k, ptr);
+
+ if (!(meta ? CACHE_HAS_METADATA : CACHE_HAS_DATA)(mi))
+ return false;
+ }
+
+ return true;
+}
+
+static inline void bch_check_mark_super(struct cache_set *c,
+ struct bkey *k, bool meta)
+{
+ if (bch_check_super_marked(c, k, meta))
+ return;
+
+ bch_check_mark_super_slowpath(c, k, meta);
+}
+
+int bch_super_realloc(struct cache *, unsigned);
+void bcache_write_super(struct cache_set *);
+
+void bch_write_bdev_super(struct cached_dev *, struct closure *);
+
+void bch_cached_dev_release(struct kobject *);
+void bch_flash_dev_release(struct kobject *);
+void bch_cache_set_release(struct kobject *);
+void bch_cache_release(struct kobject *);
+
+void bch_cache_set_unregister(struct cache_set *);
+void bch_cache_set_stop(struct cache_set *);
+
+const char *register_bcache_devices(char **, int, struct cache_set **);
+const char *bch_run_cache_set(struct cache_set *);
+
+int bch_flash_dev_create(struct cache_set *, u64);
+
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+void bch_cached_dev_detach(struct cached_dev *);
+void bch_cached_dev_run(struct cached_dev *);
+void bcache_device_stop(struct bcache_device *);
+
+void bch_cache_read_only(struct cache *);
+const char *bch_cache_read_write(struct cache *);
+void bch_cache_remove(struct cache *);
+int bch_cache_add(struct cache_set *, const char *);
+
+extern struct mutex bch_register_lock;
+extern struct list_head bch_cache_sets;
+extern struct idr bch_cache_set_minor;
+
+extern struct kobj_type bch_cached_dev_ktype;
+extern struct kobj_type bch_flash_dev_ktype;
+extern struct kobj_type bch_cache_set_ktype;
+extern struct kobj_type bch_cache_set_internal_ktype;
+extern struct kobj_type bch_cache_ktype;
+
+#endif /* _BCACHE_SUPER_H */
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 83076355847d..2883f36ec6d6 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,8 +9,10 @@
#include "alloc.h"
#include "sysfs.h"
#include "btree.h"
+#include "inode.h"
#include "journal.h"
#include "request.h"
+#include "super.h"
#include "writeback.h"
#include <linux/blkdev.h>
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 10bdc00a7933..a29e87278414 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -1,5 +1,6 @@
#include "bcache.h"
#include "btree.h"
+#include "buckets.h"
#include <linux/blktrace_api.h>