diff options
-rw-r--r-- | drivers/md/bcache/journal.c | 47 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 26 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 26 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 3 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 2 |
6 files changed, 65 insertions, 41 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0d8bf7ca7e4a..28a79a1a0e3f 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -465,13 +465,14 @@ static enum { } journal_entry_validate(struct cache *ca, const struct jset *j, u64 sector, unsigned bucket_sectors_left, unsigned sectors_read) { + struct cache_set *c = ca->set; size_t bytes = __set_bytes(j, le32_to_cpu(j->u64s)); u64 got, expect; if (bch_meta_read_fault("journal")) return JOURNAL_ENTRY_BAD; - if (le64_to_cpu(j->magic) != jset_magic(&ca->set->disk_sb)) { + if (le64_to_cpu(j->magic) != jset_magic(&c->disk_sb)) { pr_debug("bad magic while reading journal from %llu", sector); return JOURNAL_ENTRY_BAD; } @@ -485,7 +486,7 @@ static enum { return JOURNAL_ENTRY_BAD; if (cache_inconsistent_on(bytes > bucket_sectors_left << 9 || - bytes > JOURNAL_BUF_BYTES, ca, + bytes > c->journal.entry_size_max, ca, "journal entry too big (%zu bytes), sector %lluu", bytes, sector)) return JOURNAL_ENTRY_BAD; @@ -515,15 +516,17 @@ static int journal_read_bucket(struct cache *ca, struct journal_list *jlist, { struct cache_set *c = ca->set; struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio; + struct bio *bio = ja->bio; struct jset *j, *data; unsigned blocks, sectors_read, bucket_offset = 0; + unsigned max_entry_sectors = c->journal.entry_size_max >> 9; u64 sector = bucket_to_sector(ca, journal_bucket(ca->disk_sb.sb, bucket)); bool entries_found = false; int ret = 0; - data = (void *) __get_free_pages(GFP_KERNEL, JOURNAL_BUF_ORDER); + data = (void *) __get_free_pages(GFP_KERNEL, + get_order(c->journal.entry_size_max)); if (!data) { mutex_lock(&jlist->cache_set_buffer_lock); data = c->journal.buf[0].data; @@ -535,7 +538,7 @@ static int journal_read_bucket(struct cache *ca, struct journal_list *jlist, reread: sectors_read = min_t(unsigned, ca->mi.bucket_size - bucket_offset, - JOURNAL_BUF_SECTORS); + max_entry_sectors); bio_reset(bio); bio->bi_bdev = ca->disk_sb.bdev; @@ -616,7 +619,8 @@ err: if (data == c->journal.buf[0].data) mutex_unlock(&jlist->cache_set_buffer_lock); else - free_pages((unsigned long) data, JOURNAL_BUF_ORDER); + free_pages((unsigned long) data, + get_order(c->journal.entry_size_max)); return ret; } @@ -1137,7 +1141,7 @@ static int journal_entry_sectors(struct journal *j) struct cache_set *c = container_of(j, struct cache_set, journal); struct cache *ca; struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); - unsigned sectors_available = JOURNAL_BUF_SECTORS; + unsigned sectors_available = j->entry_size_max >> 9; unsigned i, nr_online = 0, nr_devs = 0; lockdep_assert_held(&j->lock); @@ -1219,6 +1223,8 @@ static int journal_entry_open(struct journal *j) u64s -= journal_entry_u64s_reserve(buf); u64s = max_t(ssize_t, 0L, u64s); + BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL); + if (u64s > le32_to_cpu(buf->data->u64s)) { union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); @@ -1829,7 +1835,7 @@ static void journal_write_compact(struct jset *jset) static void journal_write_endio(struct bio *bio) { - struct cache *ca = container_of(bio, struct cache, journal.bio); + struct cache *ca = bio->bi_private; struct journal *j = &ca->set->journal; if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") || @@ -1940,16 +1946,15 @@ static void journal_write(struct closure *cl) continue; } - bio = &ca->journal.bio; - atomic64_add(sectors, &ca->meta_sectors_written); + bio = ca->journal.bio; bio_reset(bio); bio->bi_iter.bi_sector = ptr->offset; bio->bi_bdev = ca->disk_sb.bdev; bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; - bio->bi_private = w; + bio->bi_private = ca; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); bch_bio_map(bio, w->data); @@ -1968,11 +1973,11 @@ static void journal_write(struct closure *cl) !bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) { percpu_ref_get(&ca->ref); - bio = &ca->journal.bio; + bio = ca->journal.bio; bio_reset(bio); bio->bi_bdev = ca->disk_sb.bdev; bio->bi_end_io = journal_write_endio; - bio->bi_private = w; + bio->bi_private = ca; bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); closure_bio_submit_punt(bio, cl, c); } @@ -2243,14 +2248,17 @@ int bch_journal_flush(struct journal *j) void bch_journal_free(struct journal *j) { - free_pages((unsigned long) j->buf[1].data, JOURNAL_BUF_ORDER); - free_pages((unsigned long) j->buf[0].data, JOURNAL_BUF_ORDER); + unsigned order = get_order(j->entry_size_max); + + free_pages((unsigned long) j->buf[1].data, order); + free_pages((unsigned long) j->buf[0].data, order); free_fifo(&j->pin); } -int bch_journal_alloc(struct journal *j) +int bch_journal_alloc(struct journal *j, unsigned entry_size_max) { static struct lock_class_key res_key; + unsigned order = get_order(entry_size_max); spin_lock_init(&j->lock); spin_lock_init(&j->pin_lock); @@ -2263,6 +2271,7 @@ int bch_journal_alloc(struct journal *j) lockdep_init_map(&j->res_map, "journal res", &res_key, 0); + j->entry_size_max = entry_size_max; j->write_delay_ms = 100; j->reclaim_delay_ms = 100; @@ -2273,10 +2282,8 @@ int bch_journal_alloc(struct journal *j) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, - JOURNAL_BUF_ORDER)) || - !(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, - JOURNAL_BUF_ORDER))) + !(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) || + !(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order))) return -ENOMEM; return 0; diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index b0f1ce7b9589..940e697d5d47 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -350,7 +350,7 @@ static inline void bch_journal_set_replay_done(struct journal *j) } void bch_journal_free(struct journal *); -int bch_journal_alloc(struct journal *); +int bch_journal_alloc(struct journal *, unsigned); ssize_t bch_journal_print_debug(struct journal *, char *); diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h index 3200ba7462d2..087f41bff385 100644 --- a/drivers/md/bcache/journal_types.h +++ b/drivers/md/bcache/journal_types.h @@ -7,11 +7,6 @@ struct journal_res; -/* size of allocated buffer, max journal entry size: */ -#define JOURNAL_BUF_BYTES (256 << 10) -#define JOURNAL_BUF_SECTORS (JOURNAL_BUF_BYTES >> 9) -#define JOURNAL_BUF_ORDER ilog2(JOURNAL_BUF_BYTES >> PAGE_SHIFT) - /* * We put two of these in struct journal; we used them for writes to the * journal that are being staged or in flight. @@ -70,8 +65,8 @@ struct journal_seq_blacklist { struct journal_res { bool ref; u8 idx; - u16 offset; u16 u64s; + u32 offset; u64 seq; }; @@ -85,19 +80,24 @@ union journal_res_state { }; struct { - u64 cur_entry_offset:16, + u64 cur_entry_offset:20, idx:1, prev_buf_unwritten:1, - buf0_count:23, - buf1_count:23; + buf0_count:21, + buf1_count:21; }; }; +/* 4 mb, in bytes: */ +#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) + /* * We stash some journal state as sentinal values in cur_entry_offset: */ -#define JOURNAL_ENTRY_CLOSED_VAL (U16_MAX - 1) -#define JOURNAL_ENTRY_ERROR_VAL (U16_MAX) +#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) + +#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1) +#define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX) /* * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP, @@ -120,6 +120,7 @@ struct journal { unsigned cur_entry_u64s; unsigned prev_buf_sectors; unsigned cur_buf_sectors; + unsigned entry_size_max; /* bytes */ /* * Two journal entries -- one is currently open for new entries, the @@ -226,8 +227,7 @@ struct journal_device { unsigned last_idx; /* Bio for journal reads/writes to this device */ - struct bio bio; - struct bio_vec bv[JOURNAL_BUF_BYTES / PAGE_SIZE]; + struct bio *bio; /* for bch_journal_read_device */ struct closure read; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 85d8f60a3c07..1aba0a698755 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -260,6 +260,13 @@ static const char *validate_cache_super(struct bcache_superblock *disk_sb) if (CACHE_SET_GC_RESERVE(sb) < 5) return "gc reserve percentage too small"; + if (!CACHE_SET_JOURNAL_ENTRY_SIZE(sb)) + SET_CACHE_SET_JOURNAL_ENTRY_SIZE(sb, 9); + + /* 4 mb max: */ + if (512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX) + return "max journal entry size too big"; + if (le16_to_cpu(sb->u64s) < bch_journal_buckets_offset(sb)) return "Invalid superblock: member info area missing"; @@ -1053,7 +1060,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, struct cache_set_opts opts) { struct cache_set *c; - unsigned iter_size; + unsigned iter_size, journal_entry_bytes; c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); if (!c) @@ -1143,6 +1150,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, iter_size = (btree_blocks(c) + 1) * 2 * sizeof(struct btree_node_iter_set); + journal_entry_bytes = 512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb); + if (!(c->wq = alloc_workqueue("bcache", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || !(c->copygc_wq = alloc_workqueue("bcache_copygc", @@ -1170,7 +1179,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, bdi_setup_and_register(&c->bdi, "bcache") || bch_io_clock_init(&c->io_clock[READ]) || bch_io_clock_init(&c->io_clock[WRITE]) || - bch_journal_alloc(&c->journal) || + bch_journal_alloc(&c->journal, journal_entry_bytes) || bch_btree_cache_alloc(c) || bch_compress_init(c)) goto err; @@ -1698,6 +1707,7 @@ static void bch_cache_free_work(struct work_struct *work) free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); kfree(ca->bio_prio); + kfree(ca->journal.bio); vfree(ca->buckets); vfree(ca->bucket_gens); free_heap(&ca->heap); @@ -1899,7 +1909,7 @@ static const char *cache_alloc(struct bcache_superblock *sb, { size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve; size_t heap_size; - unsigned i; + unsigned i, journal_entry_pages; const char *err = "cannot allocate memory"; struct cache *ca; @@ -1928,9 +1938,6 @@ static const char *cache_alloc(struct bcache_superblock *sb, INIT_WORK(&ca->free_work, bch_cache_free_work); INIT_WORK(&ca->remove_work, bch_cache_remove_work); - bio_init(&ca->journal.bio); - ca->journal.bio.bi_max_vecs = 8; - ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); @@ -1961,6 +1968,10 @@ static const char *cache_alloc(struct bcache_superblock *sb, free_inc_reserve = movinggc_reserve / 2; heap_size = movinggc_reserve * 8; + journal_entry_pages = + DIV_ROUND_UP(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb), + PAGE_SECTORS); + if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], @@ -1978,7 +1989,8 @@ static const char *cache_alloc(struct bcache_superblock *sb, !(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) || !(ca->journal.bucket_seq = kcalloc(bch_nr_journal_buckets(ca->disk_sb.sb), sizeof(u64), GFP_KERNEL)) || - !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) || + !(ca->journal.bio = bio_kmalloc(GFP_KERNEL, journal_entry_pages)) || + !(ca->bio_prio = bio_kmalloc(GFP_KERNEL, bucket_pages(ca))) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio)) || !(ca->sectors_written = alloc_percpu(*ca->sectors_written))) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 04398c05373b..40d006b477ad 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -130,6 +130,7 @@ read_attribute(partial_stripes_expensive); rw_attribute(journal_write_delay_ms); rw_attribute(journal_reclaim_delay_ms); +read_attribute(journal_entry_size_max); rw_attribute(discard); rw_attribute(running); @@ -650,6 +651,7 @@ SHOW(bch_cache_set) sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); + sysfs_hprint(journal_entry_size_max, c->journal.entry_size_max); sysfs_hprint(block_size, block_bytes(c)); sysfs_print(block_size_bytes, block_bytes(c)); @@ -877,6 +879,7 @@ static struct attribute *bch_cache_set_files[] = { &sysfs_stop, &sysfs_journal_write_delay_ms, &sysfs_journal_reclaim_delay_ms, + &sysfs_journal_entry_size_max, &sysfs_blockdev_volume_create, &sysfs_add_device, diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 44de20990b56..c794ce4879bd 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -809,6 +809,8 @@ LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6); */ LE64_BITMASK(CACHE_SET_CLEAN, struct cache_sb, flags2, 6, 7); +LE64_BITMASK(CACHE_SET_JOURNAL_ENTRY_SIZE, struct cache_sb, flags2, 7, 15); + /* options: */ /** |