summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-12-11 15:27:30 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 21:41:26 -0900
commit6584f775bbc01fd14455272afafcd8c45e5b9801 (patch)
treec8daa40301ca81a7378754ac682e132d5ab1d355
parent31bc018cf74f5bed01fda161d2df881c93213404 (diff)
bcache: add field to superblock for maximum journal entry size
-rw-r--r--drivers/md/bcache/journal.c47
-rw-r--r--drivers/md/bcache/journal.h2
-rw-r--r--drivers/md/bcache/journal_types.h26
-rw-r--r--drivers/md/bcache/super.c26
-rw-r--r--drivers/md/bcache/sysfs.c3
-rw-r--r--include/uapi/linux/bcache.h2
6 files changed, 65 insertions, 41 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 0d8bf7ca7e4a..28a79a1a0e3f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -465,13 +465,14 @@ static enum {
} journal_entry_validate(struct cache *ca, const struct jset *j, u64 sector,
unsigned bucket_sectors_left, unsigned sectors_read)
{
+ struct cache_set *c = ca->set;
size_t bytes = __set_bytes(j, le32_to_cpu(j->u64s));
u64 got, expect;
if (bch_meta_read_fault("journal"))
return JOURNAL_ENTRY_BAD;
- if (le64_to_cpu(j->magic) != jset_magic(&ca->set->disk_sb)) {
+ if (le64_to_cpu(j->magic) != jset_magic(&c->disk_sb)) {
pr_debug("bad magic while reading journal from %llu", sector);
return JOURNAL_ENTRY_BAD;
}
@@ -485,7 +486,7 @@ static enum {
return JOURNAL_ENTRY_BAD;
if (cache_inconsistent_on(bytes > bucket_sectors_left << 9 ||
- bytes > JOURNAL_BUF_BYTES, ca,
+ bytes > c->journal.entry_size_max, ca,
"journal entry too big (%zu bytes), sector %lluu",
bytes, sector))
return JOURNAL_ENTRY_BAD;
@@ -515,15 +516,17 @@ static int journal_read_bucket(struct cache *ca, struct journal_list *jlist,
{
struct cache_set *c = ca->set;
struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio;
+ struct bio *bio = ja->bio;
struct jset *j, *data;
unsigned blocks, sectors_read, bucket_offset = 0;
+ unsigned max_entry_sectors = c->journal.entry_size_max >> 9;
u64 sector = bucket_to_sector(ca,
journal_bucket(ca->disk_sb.sb, bucket));
bool entries_found = false;
int ret = 0;
- data = (void *) __get_free_pages(GFP_KERNEL, JOURNAL_BUF_ORDER);
+ data = (void *) __get_free_pages(GFP_KERNEL,
+ get_order(c->journal.entry_size_max));
if (!data) {
mutex_lock(&jlist->cache_set_buffer_lock);
data = c->journal.buf[0].data;
@@ -535,7 +538,7 @@ static int journal_read_bucket(struct cache *ca, struct journal_list *jlist,
reread:
sectors_read = min_t(unsigned,
ca->mi.bucket_size - bucket_offset,
- JOURNAL_BUF_SECTORS);
+ max_entry_sectors);
bio_reset(bio);
bio->bi_bdev = ca->disk_sb.bdev;
@@ -616,7 +619,8 @@ err:
if (data == c->journal.buf[0].data)
mutex_unlock(&jlist->cache_set_buffer_lock);
else
- free_pages((unsigned long) data, JOURNAL_BUF_ORDER);
+ free_pages((unsigned long) data,
+ get_order(c->journal.entry_size_max));
return ret;
}
@@ -1137,7 +1141,7 @@ static int journal_entry_sectors(struct journal *j)
struct cache_set *c = container_of(j, struct cache_set, journal);
struct cache *ca;
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
- unsigned sectors_available = JOURNAL_BUF_SECTORS;
+ unsigned sectors_available = j->entry_size_max >> 9;
unsigned i, nr_online = 0, nr_devs = 0;
lockdep_assert_held(&j->lock);
@@ -1219,6 +1223,8 @@ static int journal_entry_open(struct journal *j)
u64s -= journal_entry_u64s_reserve(buf);
u64s = max_t(ssize_t, 0L, u64s);
+ BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
+
if (u64s > le32_to_cpu(buf->data->u64s)) {
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
@@ -1829,7 +1835,7 @@ static void journal_write_compact(struct jset *jset)
static void journal_write_endio(struct bio *bio)
{
- struct cache *ca = container_of(bio, struct cache, journal.bio);
+ struct cache *ca = bio->bi_private;
struct journal *j = &ca->set->journal;
if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") ||
@@ -1940,16 +1946,15 @@ static void journal_write(struct closure *cl)
continue;
}
- bio = &ca->journal.bio;
-
atomic64_add(sectors, &ca->meta_sectors_written);
+ bio = ca->journal.bio;
bio_reset(bio);
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_bdev = ca->disk_sb.bdev;
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio;
- bio->bi_private = w;
+ bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_WRITE,
REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
bch_bio_map(bio, w->data);
@@ -1968,11 +1973,11 @@ static void journal_write(struct closure *cl)
!bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) {
percpu_ref_get(&ca->ref);
- bio = &ca->journal.bio;
+ bio = ca->journal.bio;
bio_reset(bio);
bio->bi_bdev = ca->disk_sb.bdev;
bio->bi_end_io = journal_write_endio;
- bio->bi_private = w;
+ bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
closure_bio_submit_punt(bio, cl, c);
}
@@ -2243,14 +2248,17 @@ int bch_journal_flush(struct journal *j)
void bch_journal_free(struct journal *j)
{
- free_pages((unsigned long) j->buf[1].data, JOURNAL_BUF_ORDER);
- free_pages((unsigned long) j->buf[0].data, JOURNAL_BUF_ORDER);
+ unsigned order = get_order(j->entry_size_max);
+
+ free_pages((unsigned long) j->buf[1].data, order);
+ free_pages((unsigned long) j->buf[0].data, order);
free_fifo(&j->pin);
}
-int bch_journal_alloc(struct journal *j)
+int bch_journal_alloc(struct journal *j, unsigned entry_size_max)
{
static struct lock_class_key res_key;
+ unsigned order = get_order(entry_size_max);
spin_lock_init(&j->lock);
spin_lock_init(&j->pin_lock);
@@ -2263,6 +2271,7 @@ int bch_journal_alloc(struct journal *j)
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
+ j->entry_size_max = entry_size_max;
j->write_delay_ms = 100;
j->reclaim_delay_ms = 100;
@@ -2273,10 +2282,8 @@ int bch_journal_alloc(struct journal *j)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
- !(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL,
- JOURNAL_BUF_ORDER)) ||
- !(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL,
- JOURNAL_BUF_ORDER)))
+ !(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
+ !(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
return -ENOMEM;
return 0;
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index b0f1ce7b9589..940e697d5d47 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -350,7 +350,7 @@ static inline void bch_journal_set_replay_done(struct journal *j)
}
void bch_journal_free(struct journal *);
-int bch_journal_alloc(struct journal *);
+int bch_journal_alloc(struct journal *, unsigned);
ssize_t bch_journal_print_debug(struct journal *, char *);
diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h
index 3200ba7462d2..087f41bff385 100644
--- a/drivers/md/bcache/journal_types.h
+++ b/drivers/md/bcache/journal_types.h
@@ -7,11 +7,6 @@
struct journal_res;
-/* size of allocated buffer, max journal entry size: */
-#define JOURNAL_BUF_BYTES (256 << 10)
-#define JOURNAL_BUF_SECTORS (JOURNAL_BUF_BYTES >> 9)
-#define JOURNAL_BUF_ORDER ilog2(JOURNAL_BUF_BYTES >> PAGE_SHIFT)
-
/*
* We put two of these in struct journal; we used them for writes to the
* journal that are being staged or in flight.
@@ -70,8 +65,8 @@ struct journal_seq_blacklist {
struct journal_res {
bool ref;
u8 idx;
- u16 offset;
u16 u64s;
+ u32 offset;
u64 seq;
};
@@ -85,19 +80,24 @@ union journal_res_state {
};
struct {
- u64 cur_entry_offset:16,
+ u64 cur_entry_offset:20,
idx:1,
prev_buf_unwritten:1,
- buf0_count:23,
- buf1_count:23;
+ buf0_count:21,
+ buf1_count:21;
};
};
+/* 4 mb, in bytes: */
+#define JOURNAL_ENTRY_SIZE_MAX (4U << 20)
+
/*
* We stash some journal state as sentinal values in cur_entry_offset:
*/
-#define JOURNAL_ENTRY_CLOSED_VAL (U16_MAX - 1)
-#define JOURNAL_ENTRY_ERROR_VAL (U16_MAX)
+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
+
+#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1)
+#define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX)
/*
* JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
@@ -120,6 +120,7 @@ struct journal {
unsigned cur_entry_u64s;
unsigned prev_buf_sectors;
unsigned cur_buf_sectors;
+ unsigned entry_size_max; /* bytes */
/*
* Two journal entries -- one is currently open for new entries, the
@@ -226,8 +227,7 @@ struct journal_device {
unsigned last_idx;
/* Bio for journal reads/writes to this device */
- struct bio bio;
- struct bio_vec bv[JOURNAL_BUF_BYTES / PAGE_SIZE];
+ struct bio *bio;
/* for bch_journal_read_device */
struct closure read;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 85d8f60a3c07..1aba0a698755 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -260,6 +260,13 @@ static const char *validate_cache_super(struct bcache_superblock *disk_sb)
if (CACHE_SET_GC_RESERVE(sb) < 5)
return "gc reserve percentage too small";
+ if (!CACHE_SET_JOURNAL_ENTRY_SIZE(sb))
+ SET_CACHE_SET_JOURNAL_ENTRY_SIZE(sb, 9);
+
+ /* 4 mb max: */
+ if (512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX)
+ return "max journal entry size too big";
+
if (le16_to_cpu(sb->u64s) < bch_journal_buckets_offset(sb))
return "Invalid superblock: member info area missing";
@@ -1053,7 +1060,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
struct cache_set_opts opts)
{
struct cache_set *c;
- unsigned iter_size;
+ unsigned iter_size, journal_entry_bytes;
c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
if (!c)
@@ -1143,6 +1150,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
iter_size = (btree_blocks(c) + 1) * 2 *
sizeof(struct btree_node_iter_set);
+ journal_entry_bytes = 512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb);
+
if (!(c->wq = alloc_workqueue("bcache",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
@@ -1170,7 +1179,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
bdi_setup_and_register(&c->bdi, "bcache") ||
bch_io_clock_init(&c->io_clock[READ]) ||
bch_io_clock_init(&c->io_clock[WRITE]) ||
- bch_journal_alloc(&c->journal) ||
+ bch_journal_alloc(&c->journal, journal_entry_bytes) ||
bch_btree_cache_alloc(c) ||
bch_compress_init(c))
goto err;
@@ -1698,6 +1707,7 @@ static void bch_cache_free_work(struct work_struct *work)
free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
kfree(ca->prio_buckets);
kfree(ca->bio_prio);
+ kfree(ca->journal.bio);
vfree(ca->buckets);
vfree(ca->bucket_gens);
free_heap(&ca->heap);
@@ -1899,7 +1909,7 @@ static const char *cache_alloc(struct bcache_superblock *sb,
{
size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
size_t heap_size;
- unsigned i;
+ unsigned i, journal_entry_pages;
const char *err = "cannot allocate memory";
struct cache *ca;
@@ -1928,9 +1938,6 @@ static const char *cache_alloc(struct bcache_superblock *sb,
INIT_WORK(&ca->free_work, bch_cache_free_work);
INIT_WORK(&ca->remove_work, bch_cache_remove_work);
- bio_init(&ca->journal.bio);
- ca->journal.bio.bi_max_vecs = 8;
- ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
spin_lock_init(&ca->freelist_lock);
spin_lock_init(&ca->prio_buckets_lock);
mutex_init(&ca->heap_lock);
@@ -1961,6 +1968,10 @@ static const char *cache_alloc(struct bcache_superblock *sb,
free_inc_reserve = movinggc_reserve / 2;
heap_size = movinggc_reserve * 8;
+ journal_entry_pages =
+ DIV_ROUND_UP(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb),
+ PAGE_SECTORS);
+
if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_MOVINGGC],
@@ -1978,7 +1989,8 @@ static const char *cache_alloc(struct bcache_superblock *sb,
!(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) ||
!(ca->journal.bucket_seq = kcalloc(bch_nr_journal_buckets(ca->disk_sb.sb),
sizeof(u64), GFP_KERNEL)) ||
- !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
+ !(ca->journal.bio = bio_kmalloc(GFP_KERNEL, journal_entry_pages)) ||
+ !(ca->bio_prio = bio_kmalloc(GFP_KERNEL, bucket_pages(ca))) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio)) ||
!(ca->sectors_written = alloc_percpu(*ca->sectors_written)))
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 04398c05373b..40d006b477ad 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -130,6 +130,7 @@ read_attribute(partial_stripes_expensive);
rw_attribute(journal_write_delay_ms);
rw_attribute(journal_reclaim_delay_ms);
+read_attribute(journal_entry_size_max);
rw_attribute(discard);
rw_attribute(running);
@@ -650,6 +651,7 @@ SHOW(bch_cache_set)
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
+ sysfs_hprint(journal_entry_size_max, c->journal.entry_size_max);
sysfs_hprint(block_size, block_bytes(c));
sysfs_print(block_size_bytes, block_bytes(c));
@@ -877,6 +879,7 @@ static struct attribute *bch_cache_set_files[] = {
&sysfs_stop,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
+ &sysfs_journal_entry_size_max,
&sysfs_blockdev_volume_create,
&sysfs_add_device,
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 44de20990b56..c794ce4879bd 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -809,6 +809,8 @@ LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6);
*/
LE64_BITMASK(CACHE_SET_CLEAN, struct cache_sb, flags2, 6, 7);
+LE64_BITMASK(CACHE_SET_JOURNAL_ENTRY_SIZE, struct cache_sb, flags2, 7, 15);
+
/* options: */
/**