summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-06-24 14:27:48 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2020-06-28 18:12:46 -0400
commit7b42d65c0607af212b9ef3aa834ea449e3f846b1 (patch)
tree44b973d839aacebf7564169a010d2c7a9e1c88cf
parent919b1e2379b8f5ae68ddb4a63ca208373bae9fdc (diff)
bcache: Header refactoring
For the bcache + bcachefs layer, we're going to need to reorganize the headers so we can have code that interfaces to bcachefs and parts of bcache.
-rw-r--r--drivers/md/bcache/backingdev.h251
-rw-r--r--drivers/md/bcache/bcache.h205
-rw-r--r--drivers/md/bcache/btree.c1
-rw-r--r--drivers/md/bcache/debug.c1
-rw-r--r--drivers/md/bcache/io.c168
-rw-r--r--drivers/md/bcache/io.h26
-rw-r--r--drivers/md/bcache/movinggc.c1
-rw-r--r--drivers/md/bcache/request.c152
-rw-r--r--drivers/md/bcache/request.h1
-rw-r--r--drivers/md/bcache/super.c2
-rw-r--r--drivers/md/bcache/super.h34
-rw-r--r--drivers/md/bcache/sysfs.c1
-rw-r--r--drivers/md/bcache/writeback.c1
-rw-r--r--drivers/md/bcache/writeback.h58
-rw-r--r--include/uapi/linux/bcache.h171
-rw-r--r--include/uapi/linux/bcache_superblock.h177
16 files changed, 667 insertions, 583 deletions
diff --git a/drivers/md/bcache/backingdev.h b/drivers/md/bcache/backingdev.h
new file mode 100644
index 000000000000..58362eb7902a
--- /dev/null
+++ b/drivers/md/bcache/backingdev.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHE_BACKINGDEV_H
+#define _BCACHE_BACKINGDEV_H
+
+#include <linux/bio.h>
+#include <linux/closure.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+
+#include <linux/bcache_superblock.h>
+#include <linux/bcache/ratelimit.h>
+
+#include "stats.h"
+#include "super.h"
+
+struct search;
+struct btree;
+
+struct bcache_device {
+ struct closure cl;
+
+ struct kobject kobj;
+
+ struct cache_set *c;
+ unsigned int id;
+#define BCACHEDEVNAME_SIZE 12
+ char name[BCACHEDEVNAME_SIZE];
+
+ struct gendisk *disk;
+
+ unsigned long flags;
+#define BCACHE_DEV_CLOSING 0
+#define BCACHE_DEV_DETACHING 1
+#define BCACHE_DEV_UNLINK_DONE 2
+#define BCACHE_DEV_WB_RUNNING 3
+#define BCACHE_DEV_RATE_DW_RUNNING 4
+ unsigned int nr_stripes;
+ unsigned int stripe_size;
+ atomic_t *stripe_sectors_dirty;
+ unsigned long *full_dirty_stripes;
+
+ struct bio_set bio_split;
+
+ unsigned int data_csum:1;
+
+ int (*cache_miss)(struct btree *b, struct search *s,
+ struct bio *bio, unsigned int sectors);
+ int (*ioctl)(struct bcache_device *d, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+};
+
+enum stop_on_failure {
+ BCH_CACHED_DEV_STOP_AUTO = 0,
+ BCH_CACHED_DEV_STOP_ALWAYS,
+ BCH_CACHED_DEV_STOP_MODE_MAX,
+};
+
+struct io {
+ /* Used to track sequential IO so it can be skipped */
+ struct hlist_node hash;
+ struct list_head lru;
+
+ unsigned long jiffies;
+ unsigned int sequential;
+ sector_t last;
+};
+
+struct cached_dev {
+ struct list_head list;
+ struct bcache_device disk;
+ struct block_device *bdev;
+
+ struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
+ struct bio sb_bio;
+ struct bio_vec sb_bv[1];
+ struct closure sb_write;
+ struct semaphore sb_write_mutex;
+
+ /* Refcount on the cache set. Always nonzero when we're caching. */
+ refcount_t count;
+ struct work_struct detach;
+
+ /*
+ * Device might not be running if it's dirty and the cache set hasn't
+ * showed up yet.
+ */
+ atomic_t running;
+
+ /*
+ * Writes take a shared lock from start to finish; scanning for dirty
+ * data to refill the rb tree requires an exclusive lock.
+ */
+ struct rw_semaphore writeback_lock;
+
+ /*
+ * Nonzero, and writeback has a refcount (d->count), iff there is dirty
+ * data in the cache. Protected by writeback_lock; must have an
+ * shared lock to set and exclusive lock to clear.
+ */
+ atomic_t has_dirty;
+
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
+ struct bch_ratelimit writeback_rate;
+ struct delayed_work writeback_rate_update;
+
+ /* Limit number of writeback bios in flight */
+ struct semaphore in_flight;
+ struct task_struct *writeback_thread;
+ struct workqueue_struct *writeback_write_wq;
+
+ struct keybuf *writeback_keys;
+
+ struct task_struct *status_update_thread;
+ /*
+ * Order the write-half of writeback operations strongly in dispatch
+ * order. (Maintain LBA order; don't allow reads completing out of
+ * order to re-order the writes...)
+ */
+ struct closure_waitlist writeback_ordering_wait;
+ atomic_t writeback_sequence_next;
+
+ /* For tracking sequential IO */
+#define RECENT_IO_BITS 7
+#define RECENT_IO (1 << RECENT_IO_BITS)
+ struct io io[RECENT_IO];
+ struct hlist_head io_hash[RECENT_IO + 1];
+ struct list_head io_lru;
+ spinlock_t io_lock;
+
+ struct cache_accounting accounting;
+
+ /* The rest of this all shows up in sysfs */
+ unsigned int sequential_cutoff;
+ unsigned int readahead;
+
+ unsigned int io_disable:1;
+ unsigned int verify:1;
+ unsigned int bypass_torture_test:1;
+
+ unsigned int partial_stripes_expensive:1;
+ unsigned int writeback_metadata:1;
+ unsigned int writeback_running:1;
+ unsigned char writeback_percent;
+ unsigned int writeback_delay;
+
+ uint64_t writeback_rate_target;
+ int64_t writeback_rate_proportional;
+ int64_t writeback_rate_integral;
+ int64_t writeback_rate_integral_scaled;
+ int32_t writeback_rate_change;
+
+ unsigned int writeback_rate_update_seconds;
+ unsigned int writeback_rate_i_term_inverse;
+ unsigned int writeback_rate_p_term_inverse;
+ unsigned int writeback_rate_minimum;
+
+ enum stop_on_failure stop_when_cache_set_failed;
+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned int error_limit;
+ unsigned int offline_seconds;
+
+ char backing_dev_name[BDEVNAME_SIZE];
+};
+
+static inline unsigned int cache_mode(struct cached_dev *dc)
+{
+ return BDEV_CACHE_MODE(&dc->sb);
+}
+
+extern unsigned int bch_cutoff_writeback;
+extern unsigned int bch_cutoff_writeback_sync;
+
+static inline void cached_dev_put(struct cached_dev *dc)
+{
+ if (refcount_dec_and_test(&dc->count))
+ schedule_work(&dc->detach);
+}
+
+static inline bool cached_dev_get(struct cached_dev *dc)
+{
+ if (!refcount_inc_not_zero(&dc->count))
+ return false;
+
+ /* Paired with the mb in cached_dev_attach */
+ smp_mb__after_atomic();
+ return true;
+}
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+ uint64_t i, ret = 0;
+
+ for (i = 0; i < d->nr_stripes; i++)
+ ret += atomic_read(d->stripe_sectors_dirty + i);
+
+ return ret;
+}
+
+static inline unsigned int offset_to_stripe(struct bcache_device *d,
+ uint64_t offset)
+{
+ do_div(offset, d->stripe_size);
+ return offset;
+}
+
+static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
+ uint64_t offset,
+ unsigned int nr_sectors)
+{
+ unsigned int stripe = offset_to_stripe(&dc->disk, offset);
+
+ while (1) {
+ if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
+ return true;
+
+ if (nr_sectors <= dc->disk.stripe_size)
+ return false;
+
+ nr_sectors -= dc->disk.stripe_size;
+ stripe++;
+ }
+}
+
+static inline void bch_writeback_queue(struct cached_dev *dc)
+{
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ wake_up_process(dc->writeback_thread);
+}
+
+static inline void bch_writeback_add(struct cached_dev *dc)
+{
+ if (!atomic_read(&dc->has_dirty) &&
+ !atomic_xchg(&dc->has_dirty, 1)) {
+ if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
+ /* XXX: should do this synchronously */
+ bch_write_bdev_super(dc, NULL);
+ }
+
+ bch_writeback_queue(dc);
+ }
+}
+
+#define CUTOFF_CACHE_ADD 95
+#define CUTOFF_CACHE_READA 90
+
+#endif /* _BCACHE_BACKINGDEV_H */
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 646ce2bacb3c..b779098c6aa1 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -192,6 +192,7 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
+#include "backingdev.h"
#include "bset.h"
#include "util.h"
@@ -247,156 +248,6 @@ struct keybuf {
DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
};
-struct bcache_device {
- struct closure cl;
-
- struct kobject kobj;
-
- struct cache_set *c;
- unsigned int id;
-#define BCACHEDEVNAME_SIZE 12
- char name[BCACHEDEVNAME_SIZE];
-
- struct gendisk *disk;
-
- unsigned long flags;
-#define BCACHE_DEV_CLOSING 0
-#define BCACHE_DEV_DETACHING 1
-#define BCACHE_DEV_UNLINK_DONE 2
-#define BCACHE_DEV_WB_RUNNING 3
-#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned int nr_stripes;
- unsigned int stripe_size;
- atomic_t *stripe_sectors_dirty;
- unsigned long *full_dirty_stripes;
-
- struct bio_set bio_split;
-
- unsigned int data_csum:1;
-
- int (*cache_miss)(struct btree *b, struct search *s,
- struct bio *bio, unsigned int sectors);
- int (*ioctl)(struct bcache_device *d, fmode_t mode,
- unsigned int cmd, unsigned long arg);
-};
-
-struct io {
- /* Used to track sequential IO so it can be skipped */
- struct hlist_node hash;
- struct list_head lru;
-
- unsigned long jiffies;
- unsigned int sequential;
- sector_t last;
-};
-
-enum stop_on_failure {
- BCH_CACHED_DEV_STOP_AUTO = 0,
- BCH_CACHED_DEV_STOP_ALWAYS,
- BCH_CACHED_DEV_STOP_MODE_MAX,
-};
-
-struct cached_dev {
- struct list_head list;
- struct bcache_device disk;
- struct block_device *bdev;
-
- struct cache_sb sb;
- struct cache_sb_disk *sb_disk;
- struct bio sb_bio;
- struct bio_vec sb_bv[1];
- struct closure sb_write;
- struct semaphore sb_write_mutex;
-
- /* Refcount on the cache set. Always nonzero when we're caching. */
- refcount_t count;
- struct work_struct detach;
-
- /*
- * Device might not be running if it's dirty and the cache set hasn't
- * showed up yet.
- */
- atomic_t running;
-
- /*
- * Writes take a shared lock from start to finish; scanning for dirty
- * data to refill the rb tree requires an exclusive lock.
- */
- struct rw_semaphore writeback_lock;
-
- /*
- * Nonzero, and writeback has a refcount (d->count), iff there is dirty
- * data in the cache. Protected by writeback_lock; must have an
- * shared lock to set and exclusive lock to clear.
- */
- atomic_t has_dirty;
-
-#define BCH_CACHE_READA_ALL 0
-#define BCH_CACHE_READA_META_ONLY 1
- unsigned int cache_readahead_policy;
- struct bch_ratelimit writeback_rate;
- struct delayed_work writeback_rate_update;
-
- /* Limit number of writeback bios in flight */
- struct semaphore in_flight;
- struct task_struct *writeback_thread;
- struct workqueue_struct *writeback_write_wq;
-
- struct keybuf *writeback_keys;
-
- struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
- * order to re-order the writes...)
- */
- struct closure_waitlist writeback_ordering_wait;
- atomic_t writeback_sequence_next;
-
- /* For tracking sequential IO */
-#define RECENT_IO_BITS 7
-#define RECENT_IO (1 << RECENT_IO_BITS)
- struct io io[RECENT_IO];
- struct hlist_head io_hash[RECENT_IO + 1];
- struct list_head io_lru;
- spinlock_t io_lock;
-
- struct cache_accounting accounting;
-
- /* The rest of this all shows up in sysfs */
- unsigned int sequential_cutoff;
- unsigned int readahead;
-
- unsigned int io_disable:1;
- unsigned int verify:1;
- unsigned int bypass_torture_test:1;
-
- unsigned int partial_stripes_expensive:1;
- unsigned int writeback_metadata:1;
- unsigned int writeback_running:1;
- unsigned char writeback_percent;
- unsigned int writeback_delay;
-
- uint64_t writeback_rate_target;
- int64_t writeback_rate_proportional;
- int64_t writeback_rate_integral;
- int64_t writeback_rate_integral_scaled;
- int32_t writeback_rate_change;
-
- unsigned int writeback_rate_update_seconds;
- unsigned int writeback_rate_i_term_inverse;
- unsigned int writeback_rate_p_term_inverse;
- unsigned int writeback_rate_minimum;
-
- enum stop_on_failure stop_when_cache_set_failed;
-#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned int error_limit;
- unsigned int offline_seconds;
-
- char backing_dev_name[BDEVNAME_SIZE];
-};
-
enum alloc_reserve {
RESERVE_BTREE,
RESERVE_PRIO,
@@ -876,22 +727,6 @@ do { \
for (b = (ca)->buckets + (ca)->sb.first_bucket; \
b < (ca)->buckets + (ca)->sb.nbuckets; b++)
-static inline void cached_dev_put(struct cached_dev *dc)
-{
- if (refcount_dec_and_test(&dc->count))
- schedule_work(&dc->detach);
-}
-
-static inline bool cached_dev_get(struct cached_dev *dc)
-{
- if (!refcount_inc_not_zero(&dc->count))
- return false;
-
- /* Paired with the mb in cached_dev_attach */
- smp_mb__after_atomic();
- return true;
-}
-
/*
* bucket_gc_gen() returns the difference between the bucket's current gen and
* the oldest gen of any pointer into that bucket in the btree (last_gc).
@@ -949,20 +784,6 @@ static inline void wait_for_kthread_stop(void)
/* Forward declarations */
-void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
-void bch_count_io_errors(struct cache *ca, blk_status_t error,
- int is_read, const char *m);
-void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- blk_status_t error, const char *m);
-void bch_bbio_endio(struct cache_set *c, struct bio *bio,
- blk_status_t error, const char *m);
-void bch_bbio_free(struct bio *bio, struct cache_set *c);
-struct bio *bch_bbio_alloc(struct cache_set *c);
-
-void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
-void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- struct bkey *k, unsigned int ptr);
-
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
void bch_rescale_priorities(struct cache_set *c, int sectors);
@@ -980,14 +801,10 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
unsigned int sectors, unsigned int write_point,
unsigned int write_prio, bool wait);
-bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-int bch_prio_write(struct cache *ca, bool wait);
-void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
-
extern struct workqueue_struct *bcache_wq;
extern struct workqueue_struct *bch_journal_wq;
extern struct mutex bch_register_lock;
@@ -999,26 +816,6 @@ extern struct kobj_type bch_cache_set_ktype;
extern struct kobj_type bch_cache_set_internal_ktype;
extern struct kobj_type bch_cache_ktype;
-void bch_cached_dev_release(struct kobject *kobj);
-void bch_flash_dev_release(struct kobject *kobj);
-void bch_cache_set_release(struct kobject *kobj);
-void bch_cache_release(struct kobject *kobj);
-
-int bch_uuid_write(struct cache_set *c);
-void bcache_write_super(struct cache_set *c);
-
-int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- uint8_t *set_uuid);
-void bch_cached_dev_detach(struct cached_dev *dc);
-int bch_cached_dev_run(struct cached_dev *dc);
-void bcache_device_stop(struct bcache_device *d);
-
-void bch_cache_set_unregister(struct cache_set *c);
-void bch_cache_set_stop(struct cache_set *c);
-
-struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
void bch_btree_cache_free(struct cache_set *c);
int bch_btree_cache_alloc(struct cache_set *c);
void bch_moving_init_cache_set(struct cache_set *c);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index b1b91f869641..a08d8d6db2de 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -25,6 +25,7 @@
#include "btree.h"
#include "debug.h"
#include "extents.h"
+#include "io.h"
#include <linux/slab.h>
#include <linux/bitops.h>
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 336f43910383..efb7fb837d7e 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -10,6 +10,7 @@
#include "btree.h"
#include "debug.h"
#include "extents.h"
+#include "io.h"
#include <linux/console.h>
#include <linux/debugfs.h>
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 4d93f07f63e5..8ba675b8a2da 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -7,10 +7,178 @@
*/
#include "bcache.h"
+#include "backingdev.h"
#include "bset.h"
#include "debug.h"
#include <linux/blkdev.h>
+#include <linux/random.h>
+
+#include <trace/events/bcache.h>
+
+/*
+ * Congested? Return 0 (not congested) or the limit (in sectors)
+ * beyond which we should bypass the cache due to congestion.
+ */
+unsigned int bch_get_congested(const struct cache_set *c)
+{
+ int i;
+
+ if (!c->congested_read_threshold_us &&
+ !c->congested_write_threshold_us)
+ return 0;
+
+ i = (local_clock_us() - c->congested_last_us) / 1024;
+ if (i < 0)
+ return 0;
+
+ i += atomic_read(&c->congested);
+ if (i >= 0)
+ return 0;
+
+ i += CONGESTED_MAX;
+
+ if (i > 0)
+ i = fract_exp_two(i, 6);
+
+ i -= hweight32(get_random_u32());
+
+ return i > 0 ? i : 1;
+}
+
+static void add_sequential(struct task_struct *t)
+{
+ ewma_add(t->sequential_io_avg,
+ t->sequential_io, 8, 0);
+
+ t->sequential_io = 0;
+}
+
+static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
+{
+ return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
+}
+
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio)
+{
+ unsigned int mode = cache_mode(dc);
+ unsigned int sectors, congested, dirty_percentage, block_size;
+ struct task_struct *task = current;
+ struct io *i;
+
+ if (dc->disk.c) {
+ dirty_percentage = dc->disk.c->gc_stats.in_use;
+ block_size = dc->disk.c->sb.block_size;
+ } else {
+ /* XXX bcache2: */
+ dirty_percentage = 0;
+ block_size = 0;
+ //block_size = dc->disk.c2->sb.block_size;
+ }
+
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
+ dirty_percentage > CUTOFF_CACHE_ADD ||
+ (bio_op(bio) == REQ_OP_DISCARD))
+ goto skip;
+
+ if (mode == CACHE_MODE_NONE ||
+ (mode == CACHE_MODE_WRITEAROUND &&
+ op_is_write(bio_op(bio))))
+ goto skip;
+
+ /*
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
+ * (eg, for gfs2 or xfs).
+ */
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }
+
+ if (bio->bi_iter.bi_sector & (block_size - 1) ||
+ bio_sectors(bio) & (block_size - 1)) {
+ pr_debug("skipping unaligned io");
+ goto skip;
+ }
+
+ if (bypass_torture_test(dc)) {
+ if ((get_random_int() & 3) == 3)
+ goto skip;
+ else
+ goto rescale;
+ }
+
+ if (dc->disk.c) {
+ congested = bch_get_congested(dc->disk.c);
+ } else {
+ /* XXX bcache2: */
+ congested = 0;
+ }
+
+ if (!congested && !dc->sequential_cutoff)
+ goto rescale;
+
+ spin_lock(&dc->io_lock);
+
+ hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
+ if (i->last == bio->bi_iter.bi_sector &&
+ time_before(jiffies, i->jiffies))
+ goto found;
+
+ i = list_first_entry(&dc->io_lru, struct io, lru);
+
+ add_sequential(task);
+ i->sequential = 0;
+found:
+ if (i->sequential + bio->bi_iter.bi_size > i->sequential)
+ i->sequential += bio->bi_iter.bi_size;
+
+ i->last = bio_end_sector(bio);
+ i->jiffies = jiffies + msecs_to_jiffies(5000);
+ task->sequential_io = i->sequential;
+
+ hlist_del(&i->hash);
+ hlist_add_head(&i->hash, iohash(dc, i->last));
+ list_move_tail(&i->lru, &dc->io_lru);
+
+ spin_unlock(&dc->io_lock);
+
+ sectors = max(task->sequential_io,
+ task->sequential_io_avg) >> 9;
+
+ if (dc->sequential_cutoff &&
+ sectors >= dc->sequential_cutoff >> 9) {
+ trace_bcache_bypass_sequential(bio);
+ goto skip;
+ }
+
+ if (congested && sectors >= congested) {
+ trace_bcache_bypass_congested(bio);
+ goto skip;
+ }
+
+rescale:
+ if (dc->disk.c) {
+ bch_rescale_priorities(dc->disk.c, bio_sectors(bio));
+ } else {
+ /* bcache2: */
+ }
+ return false;
+skip:
+ if (dc->disk.c) {
+ bch_mark_sectors_bypassed(dc->disk.c, dc, bio_sectors(bio));
+ } else {
+ /* bcache2: */
+ }
+ return true;
+}
/* Bios with headers */
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
new file mode 100644
index 000000000000..59bc40c8b107
--- /dev/null
+++ b/drivers/md/bcache/io.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHE_IO_H_
+#define _BCACHE_IO_H_
+
+struct cached_dev;
+struct cache;
+struct cache_set;
+
+unsigned int bch_get_congested(const struct cache_set *c);
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio);
+
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
+void bch_count_io_errors(struct cache *ca, blk_status_t error,
+ int is_read, const char *m);
+void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_endio(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_free(struct bio *bio, struct cache_set *c);
+struct bio *bch_bbio_alloc(struct cache_set *c);
+
+void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
+void bch_submit_bbio(struct bio *bio, struct cache_set *c,
+ struct bkey *k, unsigned int ptr);
+
+#endif /* _BCACHE_IO_H_ */
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 7891fb512736..69e0f128a4f4 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -9,6 +9,7 @@
#include "btree.h"
#include "debug.h"
#include "request.h"
+#include "io.h"
#include <trace/events/bcache.h>
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 8b85ad8b3f45..755bc6a448cb 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,28 +10,21 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
+#include "io.h"
#include "request.h"
+#include "request2.h"
#include "writeback.h"
#include <linux/module.h>
#include <linux/hash.h>
-#include <linux/random.h>
#include <linux/backing-dev.h>
#include <trace/events/bcache.h>
-#define CUTOFF_CACHE_ADD 95
-#define CUTOFF_CACHE_READA 90
-
struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *cl);
-static unsigned int cache_mode(struct cached_dev *dc)
-{
- return BDEV_CACHE_MODE(&dc->sb);
-}
-
static bool verify(struct cached_dev *dc)
{
return dc->verify;
@@ -317,147 +310,6 @@ void bch_data_insert(struct closure *cl)
bch_data_insert_start(cl);
}
-/*
- * Congested? Return 0 (not congested) or the limit (in sectors)
- * beyond which we should bypass the cache due to congestion.
- */
-unsigned int bch_get_congested(const struct cache_set *c)
-{
- int i;
-
- if (!c->congested_read_threshold_us &&
- !c->congested_write_threshold_us)
- return 0;
-
- i = (local_clock_us() - c->congested_last_us) / 1024;
- if (i < 0)
- return 0;
-
- i += atomic_read(&c->congested);
- if (i >= 0)
- return 0;
-
- i += CONGESTED_MAX;
-
- if (i > 0)
- i = fract_exp_two(i, 6);
-
- i -= hweight32(get_random_u32());
-
- return i > 0 ? i : 1;
-}
-
-static void add_sequential(struct task_struct *t)
-{
- ewma_add(t->sequential_io_avg,
- t->sequential_io, 8, 0);
-
- t->sequential_io = 0;
-}
-
-static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
-{
- return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
-}
-
-static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
-{
- struct cache_set *c = dc->disk.c;
- unsigned int mode = cache_mode(dc);
- unsigned int sectors, congested;
- struct task_struct *task = current;
- struct io *i;
-
- if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
- (bio_op(bio) == REQ_OP_DISCARD))
- goto skip;
-
- if (mode == CACHE_MODE_NONE ||
- (mode == CACHE_MODE_WRITEAROUND &&
- op_is_write(bio_op(bio))))
- goto skip;
-
- /*
- * If the bio is for read-ahead or background IO, bypass it or
- * not depends on the following situations,
- * - If the IO is for meta data, always cache it and no bypass
- * - If the IO is not meta data, check dc->cache_reada_policy,
- * BCH_CACHE_READA_ALL: cache it and not bypass
- * BCH_CACHE_READA_META_ONLY: not cache it and bypass
- * That is, read-ahead request for metadata always get cached
- * (eg, for gfs2 or xfs).
- */
- if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
- if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
- (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
- goto skip;
- }
-
- if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
- bio_sectors(bio) & (c->sb.block_size - 1)) {
- pr_debug("skipping unaligned io");
- goto skip;
- }
-
- if (bypass_torture_test(dc)) {
- if ((get_random_int() & 3) == 3)
- goto skip;
- else
- goto rescale;
- }
-
- congested = bch_get_congested(c);
- if (!congested && !dc->sequential_cutoff)
- goto rescale;
-
- spin_lock(&dc->io_lock);
-
- hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
- if (i->last == bio->bi_iter.bi_sector &&
- time_before(jiffies, i->jiffies))
- goto found;
-
- i = list_first_entry(&dc->io_lru, struct io, lru);
-
- add_sequential(task);
- i->sequential = 0;
-found:
- if (i->sequential + bio->bi_iter.bi_size > i->sequential)
- i->sequential += bio->bi_iter.bi_size;
-
- i->last = bio_end_sector(bio);
- i->jiffies = jiffies + msecs_to_jiffies(5000);
- task->sequential_io = i->sequential;
-
- hlist_del(&i->hash);
- hlist_add_head(&i->hash, iohash(dc, i->last));
- list_move_tail(&i->lru, &dc->io_lru);
-
- spin_unlock(&dc->io_lock);
-
- sectors = max(task->sequential_io,
- task->sequential_io_avg) >> 9;
-
- if (dc->sequential_cutoff &&
- sectors >= dc->sequential_cutoff >> 9) {
- trace_bcache_bypass_sequential(bio);
- goto skip;
- }
-
- if (congested && sectors >= congested) {
- trace_bcache_bypass_congested(bio);
- goto skip;
- }
-
-rescale:
- bch_rescale_priorities(c, bio_sectors(bio));
- return false;
-skip:
- bch_mark_sectors_bypassed(c, dc, bio_sectors(bio));
- return true;
-}
-
/* Cache lookup */
struct search {
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index bb005c93dd72..01a32c5d4ec4 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -33,7 +33,6 @@ struct data_insert_op {
BKEY_PADDED(replace_key);
};
-unsigned int bch_get_congested(const struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 0da9b3af944c..f3f42862acc4 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -11,7 +11,9 @@
#include "btree.h"
#include "debug.h"
#include "extents.h"
+#include "io.h"
#include "request.h"
+#include "super.h"
#include "writeback.h"
#include <linux/blkdev.h>
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
new file mode 100644
index 000000000000..0ef3e83e8ff9
--- /dev/null
+++ b/drivers/md/bcache/super.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHE_SUPER_H
+#define _BCACHE_SUPER_H
+
+struct cached_dev;
+struct cache;
+struct cache_set;
+
+void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
+void bcache_write_super(struct cache_set *c);
+int bch_uuid_write(struct cache_set *c);
+int bch_prio_write(struct cache *ca, bool wait);
+
+void bcache_device_stop(struct bcache_device *d);
+int bch_cached_dev_run(struct cached_dev *dc);
+void bch_cached_dev_detach(struct cached_dev *dc);
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+ uint8_t *set_uuid);
+
+bool bch_cached_dev_error(struct cached_dev *dc);
+
+int bch_flash_dev_create(struct cache_set *c, uint64_t size);
+
+void bch_cached_dev_release(struct kobject *kobj);
+void bch_flash_dev_release(struct kobject *kobj);
+void bch_cache_set_release(struct kobject *kobj);
+void bch_cache_release(struct kobject *kobj);
+
+void bch_cache_set_unregister(struct cache_set *c);
+void bch_cache_set_stop(struct cache_set *c);
+
+struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
+
+#endif /* _BCACHE_SUPER_H */
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 059ca573f2ab..34d09d94e95d 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,6 +9,7 @@
#include "bcache.h"
#include "sysfs.h"
#include "btree.h"
+#include "io.h"
#include "request.h"
#include "writeback.h"
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4e8b6f42e19b..5487291035b0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
+#include "io.h"
#include "writeback.h"
#include <linux/delay.h>
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index f69bef938a0a..a83700acc4ce 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -42,44 +42,6 @@ struct bch_dirty_init_state {
struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
};
-static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
-{
- uint64_t i, ret = 0;
-
- for (i = 0; i < d->nr_stripes; i++)
- ret += atomic_read(d->stripe_sectors_dirty + i);
-
- return ret;
-}
-
-static inline unsigned int offset_to_stripe(struct bcache_device *d,
- uint64_t offset)
-{
- do_div(offset, d->stripe_size);
- return offset;
-}
-
-static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
- uint64_t offset,
- unsigned int nr_sectors)
-{
- unsigned int stripe = offset_to_stripe(&dc->disk, offset);
-
- while (1) {
- if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
- return true;
-
- if (nr_sectors <= dc->disk.stripe_size)
- return false;
-
- nr_sectors -= dc->disk.stripe_size;
- stripe++;
- }
-}
-
-extern unsigned int bch_cutoff_writeback;
-extern unsigned int bch_cutoff_writeback_sync;
-
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
unsigned int cache_mode, bool would_skip)
{
@@ -106,26 +68,6 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
in_use <= bch_cutoff_writeback);
}
-static inline void bch_writeback_queue(struct cached_dev *dc)
-{
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- wake_up_process(dc->writeback_thread);
-}
-
-static inline void bch_writeback_add(struct cached_dev *dc)
-{
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
- bch_write_bdev_super(dc, NULL);
- }
-
- bch_writeback_queue(dc);
- }
-}
-
void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
uint64_t offset, int nr_sectors);
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 9a1965c6c3d0..ee308b5fc2b5 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -6,17 +6,7 @@
* Bcache on disk data structures
*/
-#include <linux/types.h>
-
-#define BITMASK(name, type, field, offset, size) \
-static inline __u64 name(const type *k) \
-{ return (k->field >> offset) & ~(~0ULL << size); } \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- k->field &= ~(~(~0ULL << size) << offset); \
- k->field |= (v & ~(~0ULL << size)) << offset; \
-}
+#include <linux/bcache_superblock.h>
/* Btree keys - all units are in sectors */
@@ -135,165 +125,6 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
/* Superblock */
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- */
-#define BCACHE_SB_VERSION_CDEV 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_MAX_VERSION 4
-
-#define SB_SECTOR 8
-#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT)
-#define SB_SIZE 4096
-#define SB_LABEL_SIZE 32
-#define SB_JOURNAL_BUCKETS 256U
-/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
-#define MAX_CACHES_PER_SET 8
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
-
-struct cache_sb_disk {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __le64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __le64 flags;
- __le64 seq;
- __le64 pad[8];
-
- union {
- struct {
- /* Cache devices */
- __le64 nbuckets; /* device size */
-
- __le16 block_size; /* sectors */
- __le16 bucket_size; /* sectors */
-
- __le16 nr_in_set;
- __le16 nr_this_dev;
- };
- struct {
- /* Backing devices */
- __le64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __le32 last_mount; /* time overflow in y2106 */
-
- __le16 first_bucket;
- union {
- __le16 njournal_buckets;
- __le16 keys;
- };
- __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
-};
-
-struct cache_sb {
- __u64 csum;
- __u64 offset; /* sector where this sb was written */
- __u64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __u64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __u64 flags;
- __u64 seq;
- __u64 pad[8];
-
- union {
- struct {
- /* Cache devices */
- __u64 nbuckets; /* device size */
-
- __u16 block_size; /* sectors */
- __u16 bucket_size; /* sectors */
-
- __u16 nr_in_set;
- __u16 nr_this_dev;
- };
- struct {
- /* Backing devices */
- __u64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __u32 last_mount; /* time overflow in y2106 */
-
- __u16 first_bucket;
- union {
- __u16 njournal_buckets;
- __u16 keys;
- };
- __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
-};
-
-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
-{
- return sb->version == BCACHE_SB_VERSION_BDEV
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
-}
-
-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
-#define CACHE_REPLACEMENT_LRU 0U
-#define CACHE_REPLACEMENT_FIFO 1U
-#define CACHE_REPLACEMENT_RANDOM 2U
-
-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-/*
- * Magic numbers
- *
- * The various other data structures have their own magic numbers, which are
- * xored with the first part of the cache set's UUID
- */
-
-#define JSET_MAGIC 0x245235c1a3625032ULL
-#define PSET_MAGIC 0x6750e15f87337f91ULL
-#define BSET_MAGIC 0x90135c78b99e07f5ULL
-
static inline __u64 jset_magic(struct cache_sb *sb)
{
return sb->set_magic ^ JSET_MAGIC;
diff --git a/include/uapi/linux/bcache_superblock.h b/include/uapi/linux/bcache_superblock.h
new file mode 100644
index 000000000000..a152111ac55f
--- /dev/null
+++ b/include/uapi/linux/bcache_superblock.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_BCACHE_SUPERBLOCK_H
+#define _LINUX_BCACHE_SUPERBLOCK_H
+
+#include <linux/types.h>
+
+#define BITMASK(name, type, field, offset, size) \
+static inline __u64 name(const type *k) \
+{ return (k->field >> offset) & ~(~0ULL << size); } \
+ \
+static inline void SET_##name(type *k, __u64 v) \
+{ \
+ k->field &= ~(~(~0ULL << size) << offset); \
+ k->field |= (v & ~(~0ULL << size)) << offset; \
+}
+
+/* Version 0: Cache device
+ * Version 1: Backing device
+ * Version 2: Seed pointer into btree node checksum
+ * Version 3: Cache device with new UUID format
+ * Version 4: Backing device with data offset
+ */
+#define BCACHE_SB_VERSION_CDEV 0
+#define BCACHE_SB_VERSION_BDEV 1
+#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
+#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
+#define BCACHE_SB_MAX_VERSION 4
+
+#define SB_SECTOR 8
+#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT)
+#define SB_SIZE 4096
+#define SB_LABEL_SIZE 32
+#define SB_JOURNAL_BUCKETS 256U
+/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
+#define MAX_CACHES_PER_SET 8
+
+#define BDEV_DATA_START_DEFAULT 16 /* sectors */
+
+struct cache_sb_disk {
+ __le64 csum;
+ __le64 offset; /* sector where this sb was written */
+ __le64 version;
+
+ __u8 magic[16];
+
+ __u8 uuid[16];
+ union {
+ __u8 set_uuid[16];
+ __le64 set_magic;
+ };
+ __u8 label[SB_LABEL_SIZE];
+
+ __le64 flags;
+ __le64 seq;
+ __le64 pad[8];
+
+ union {
+ struct {
+ /* Cache devices */
+ __le64 nbuckets; /* device size */
+
+ __le16 block_size; /* sectors */
+ __le16 bucket_size; /* sectors */
+
+ __le16 nr_in_set;
+ __le16 nr_this_dev;
+ };
+ struct {
+ /* Backing devices */
+ __le64 data_offset;
+
+ /*
+ * block_size from the cache device section is still used by
+ * backing devices, so don't add anything here until we fix
+ * things to not need it for backing devices anymore
+ */
+ };
+ };
+
+ __le32 last_mount; /* time overflow in y2106 */
+
+ __le16 first_bucket;
+ union {
+ __le16 njournal_buckets;
+ __le16 keys;
+ };
+ __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
+};
+
+struct cache_sb {
+ __u64 csum;
+ __u64 offset; /* sector where this sb was written */
+ __u64 version;
+
+ __u8 magic[16];
+
+ __u8 uuid[16];
+ union {
+ __u8 set_uuid[16];
+ __u64 set_magic;
+ };
+ __u8 label[SB_LABEL_SIZE];
+
+ __u64 flags;
+ __u64 seq;
+ __u64 pad[8];
+
+ union {
+ struct {
+ /* Cache devices */
+ __u64 nbuckets; /* device size */
+
+ __u16 block_size; /* sectors */
+ __u16 bucket_size; /* sectors */
+
+ __u16 nr_in_set;
+ __u16 nr_this_dev;
+ };
+ struct {
+ /* Backing devices */
+ __u64 data_offset;
+
+ /*
+ * block_size from the cache device section is still used by
+ * backing devices, so don't add anything here until we fix
+ * things to not need it for backing devices anymore
+ */
+ };
+ };
+
+ __u32 last_mount; /* time overflow in y2106 */
+
+ __u16 first_bucket;
+ union {
+ __u16 njournal_buckets;
+ __u16 keys;
+ };
+ __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
+};
+
+static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
+{
+ return sb->version == BCACHE_SB_VERSION_BDEV
+ || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
+}
+
+BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
+BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
+BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
+#define CACHE_REPLACEMENT_LRU 0U
+#define CACHE_REPLACEMENT_FIFO 1U
+#define CACHE_REPLACEMENT_RANDOM 2U
+#define CACHE_REPLACEMENT_NR 3U
+
+BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
+#define CACHE_MODE_WRITETHROUGH 0U
+#define CACHE_MODE_WRITEBACK 1U
+#define CACHE_MODE_WRITEAROUND 2U
+#define CACHE_MODE_NONE 3U
+BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
+#define BDEV_STATE_NONE 0U
+#define BDEV_STATE_CLEAN 1U
+#define BDEV_STATE_DIRTY 2U
+#define BDEV_STATE_STALE 3U
+
+/*
+ * Magic numbers
+ *
+ * The various other data structures have their own magic numbers, which are
+ * xored with the first part of the cache set's UUID
+ */
+
+#define JSET_MAGIC 0x245235c1a3625032ULL
+#define PSET_MAGIC 0x6750e15f87337f91ULL
+#define BSET_MAGIC 0x90135c78b99e07f5ULL
+
+#endif /* _LINUX_BCACHE_SUPERBLOCK_H */