summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-03-16 14:51:41 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-03-16 14:51:41 -0800
commitbb1941de5378a7b8122d3575dcbc7d0aeb6326f0 (patch)
treefb0920334591a34e2673c3cfc63d07ac20ba0b92
parentac296ad4dcd32a4e7618716782fc3f2b69d03a74 (diff)
update bcache sourcesdev
-rw-r--r--.bcache_revision2
-rw-r--r--include/linux/bcache-ioctl.h8
-rw-r--r--include/linux/bcache.h4
-rw-r--r--libbcache/bcache.h2
-rw-r--r--libbcache/btree_gc.c4
-rw-r--r--libbcache/buckets.c4
-rw-r--r--libbcache/chardev.c11
-rw-r--r--libbcache/checksum.c7
-rw-r--r--libbcache/compress.c27
-rw-r--r--libbcache/extents.c6
-rw-r--r--libbcache/fs-io.c8
-rw-r--r--libbcache/io.c145
-rw-r--r--libbcache/io.h4
-rw-r--r--libbcache/io_types.h36
-rw-r--r--libbcache/notify.h6
-rw-r--r--libbcache/request.c2
-rw-r--r--libbcache/super.c82
-rw-r--r--libbcache/super.h2
-rw-r--r--libbcache/util.h30
19 files changed, 184 insertions, 206 deletions
diff --git a/.bcache_revision b/.bcache_revision
index 434bc95..72b9b17 100644
--- a/.bcache_revision
+++ b/.bcache_revision
@@ -1 +1 @@
-BCACHE_REVISION=3ea79179e3101fb50de8730a809d00d189f05be5
+BCACHE_REVISION=84b6390084721a37c0f7a261240093ad659f9a65
diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h
index 2d07666..ca76936 100644
--- a/include/linux/bcache-ioctl.h
+++ b/include/linux/bcache-ioctl.h
@@ -78,6 +78,14 @@ struct bch_ioctl_disk_set_state {
#define BCH_REWRITE_RECOMPRESS (1 << 0)
#define BCH_REWRITE_DECREASE_REPLICAS (1 << 1)
+enum bch_data_ops {
+ BCH_DATA_SCRUB,
+};
+
+struct bch_data_op {
+ __u8 type;
+};
+
struct bch_ioctl_data {
__u32 flags;
__u32 pad;
diff --git a/include/linux/bcache.h b/include/linux/bcache.h
index f4c2f27..c221747 100644
--- a/include/linux/bcache.h
+++ b/include/linux/bcache.h
@@ -886,6 +886,10 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
+struct bch_sb_field_replication {
+ struct bch_sb_field field;
+};
+
/*
* @offset - sector where this sb was written
* @version - on disk format version
diff --git a/libbcache/bcache.h b/libbcache/bcache.h
index 80d789a..1d0e998 100644
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -716,8 +716,6 @@ struct bch_fs {
void *zlib_workspace;
struct mutex zlib_workspace_lock;
mempool_t compression_bounce[2];
- struct bio_decompress_worker __percpu
- *bio_decompress_worker;
struct crypto_blkcipher *chacha20;
struct crypto_shash *poly1305;
diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c
index 9fa4a2a..5270d44 100644
--- a/libbcache/btree_gc.c
+++ b/libbcache/btree_gc.c
@@ -933,14 +933,14 @@ int bch_initial_gc(struct bch_fs *c, struct list_head *journal)
{
enum btree_id id;
- bch_mark_metadata(c);
-
for (id = 0; id < BTREE_ID_NR; id++)
bch_initial_gc_btree(c, id);
if (journal)
bch_journal_mark(c, journal);
+ bch_mark_metadata(c);
+
/*
* Skip past versions that might have possibly been used (as nonces),
* but hadn't had their pointers written:
diff --git a/libbcache/buckets.c b/libbcache/buckets.c
index a28d493..7be943d 100644
--- a/libbcache/buckets.c
+++ b/libbcache/buckets.c
@@ -462,7 +462,7 @@ static void bch_mark_pointer(struct bch_fs *c,
* the allocator invalidating a bucket after we've already
* checked the gen
*/
- if (gen_after(old.gen, ptr->gen)) {
+ if (gen_after(new.gen, ptr->gen)) {
EBUG_ON(type != S_CACHED &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
return;
@@ -470,7 +470,7 @@ static void bch_mark_pointer(struct bch_fs *c,
EBUG_ON(type != S_CACHED &&
!may_make_unavailable &&
- is_available_bucket(old) &&
+ is_available_bucket(new) &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
if (type != S_CACHED &&
diff --git a/libbcache/chardev.c b/libbcache/chardev.c
index c764a9d..da6d827 100644
--- a/libbcache/chardev.c
+++ b/libbcache/chardev.c
@@ -201,7 +201,6 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
{
struct bch_ioctl_disk arg;
struct bch_dev *ca;
- int ret;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
@@ -210,10 +209,7 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch_dev_remove(c, ca, arg.flags);
-
- percpu_ref_put(&ca->ref);
- return ret;
+ return bch_dev_remove(c, ca, arg.flags);
}
static long bch_ioctl_disk_online(struct bch_fs *c,
@@ -294,7 +290,7 @@ static long bch_ioctl_disk_evacuate(struct bch_fs *c,
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch_dev_migrate(c, ca);
+ ret = bch_dev_evacuate(c, ca);
percpu_ref_put(&ca->ref);
return ret;
@@ -384,12 +380,11 @@ void bch_chardev_exit(void)
{
if (!IS_ERR_OR_NULL(bch_chardev_class))
device_destroy(bch_chardev_class,
- MKDEV(bch_chardev_major, 0));
+ MKDEV(bch_chardev_major, 255));
if (!IS_ERR_OR_NULL(bch_chardev_class))
class_destroy(bch_chardev_class);
if (bch_chardev_major > 0)
unregister_chrdev(bch_chardev_major, "bcache");
-
}
int __init bch_chardev_init(void)
diff --git a/libbcache/checksum.c b/libbcache/checksum.c
index b3fbeb1..b96050d 100644
--- a/libbcache/checksum.c
+++ b/libbcache/checksum.c
@@ -292,9 +292,8 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
case BCH_CSUM_CRC64: {
u64 crc = bch_checksum_init(type);
- bio_for_each_segment(bv, bio, iter) {
+ bio_for_each_contig_segment(bv, bio, iter) {
void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-
crc = bch_checksum_update(type,
crc, p, bv.bv_len);
kunmap_atomic(p);
@@ -312,7 +311,7 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
gen_poly_key(c, desc, nonce);
- bio_for_each_segment(bv, bio, iter) {
+ bio_for_each_contig_segment(bv, bio, iter) {
void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
crypto_shash_update(desc, p, bv.bv_len);
@@ -342,7 +341,7 @@ void bch_encrypt_bio(struct bch_fs *c, unsigned type,
sg_init_table(sgl, ARRAY_SIZE(sgl));
- bio_for_each_segment(bv, bio, iter) {
+ bio_for_each_contig_segment(bv, bio, iter) {
if (sg == sgl + ARRAY_SIZE(sgl)) {
sg_mark_end(sg - 1);
do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
diff --git a/libbcache/compress.c b/libbcache/compress.c
index d6a345c..d9a64c3 100644
--- a/libbcache/compress.c
+++ b/libbcache/compress.c
@@ -8,6 +8,7 @@
#include <linux/zlib.h>
enum bounced {
+ BOUNCED_CONTIG,
BOUNCED_MAPPED,
BOUNCED_KMALLOCED,
BOUNCED_VMALLOCED,
@@ -54,6 +55,14 @@ static void *__bio_map_or_bounce(struct bch_fs *c,
BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX);
+#ifndef CONFIG_HIGHMEM
+ *bounced = BOUNCED_CONTIG;
+
+ __bio_for_each_contig_segment(bv, bio, iter, start) {
+ if (bv.bv_len == start.bi_size)
+ return page_address(bv.bv_page) + bv.bv_offset;
+ }
+#endif
*bounced = BOUNCED_MAPPED;
__bio_for_each_segment(bv, bio, iter, start) {
@@ -443,7 +452,6 @@ void bch_fs_compress_exit(struct bch_fs *c)
mempool_exit(&c->lz4_workspace_pool);
mempool_exit(&c->compression_bounce[WRITE]);
mempool_exit(&c->compression_bounce[READ]);
- free_percpu(c->bio_decompress_worker);
}
#define COMPRESSION_WORKSPACE_SIZE \
@@ -453,22 +461,7 @@ void bch_fs_compress_exit(struct bch_fs *c)
int bch_fs_compress_init(struct bch_fs *c)
{
unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
- int ret, cpu;
-
- if (!c->bio_decompress_worker) {
- c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
- if (!c->bio_decompress_worker)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu) {
- struct bio_decompress_worker *d =
- per_cpu_ptr(c->bio_decompress_worker, cpu);
-
- d->c = c;
- INIT_WORK(&d->work, bch_bio_decompress_work);
- init_llist_head(&d->bio_list);
- }
- }
+ int ret;
if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
diff --git a/libbcache/extents.c b/libbcache/extents.c
index 76b55f6..4b422fb 100644
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -322,9 +322,7 @@ static bool should_drop_ptr(const struct bch_fs *c,
struct bkey_s_c_extent e,
const struct bch_extent_ptr *ptr)
{
- struct bch_dev *ca = c->devs[ptr->dev];
-
- return ptr_stale(ca, ptr);
+ return ptr->cached && ptr_stale(c->devs[ptr->dev], ptr);
}
static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
@@ -2153,7 +2151,7 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
extent_for_each_ptr_crc(e, ptr, crc) {
struct bch_dev *ca = c->devs[ptr->dev];
- if (ptr_stale(ca, ptr))
+ if (ptr->cached && ptr_stale(ca, ptr))
continue;
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c
index 0aef014..afc8c20 100644
--- a/libbcache/fs-io.c
+++ b/libbcache/fs-io.c
@@ -1613,10 +1613,16 @@ ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter)
struct file *file = req->ki_filp;
struct inode *inode = file->f_inode;
struct bch_fs *c = inode->i_sb->s_fs_info;
+ struct blk_plug plug;
+ ssize_t ret;
- return ((iov_iter_rw(iter) == WRITE)
+ blk_start_plug(&plug);
+ ret = ((iov_iter_rw(iter) == WRITE)
? bch_direct_IO_write
: bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
+ blk_finish_plug(&plug);
+
+ return ret;
}
static ssize_t
diff --git a/libbcache/io.c b/libbcache/io.c
index dbe2671..753c8a3 100644
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -354,8 +354,9 @@ static void bch_write_endio(struct bio *bio)
struct bch_dev *ca = wbio->ca;
if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca,
- "data write"))
+ "data write")) {
set_closure_fn(cl, bch_write_io_error, index_update_wq(op));
+ }
bch_account_io_completion_time(ca, wbio->submit_time_us,
REQ_OP_WRITE);
@@ -973,8 +974,9 @@ static int bio_checksum_uncompress(struct bch_fs *c,
return ret;
}
-static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_free(struct bch_read_bio *rbio)
{
+ struct bch_fs *c = rbio->c;
struct bio *bio = &rbio->bio;
BUG_ON(rbio->ca);
@@ -988,7 +990,7 @@ static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
bio_put(bio);
}
-static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_done(struct bch_read_bio *rbio)
{
struct bio *orig = &bch_rbio_parent(rbio)->bio;
@@ -1000,7 +1002,7 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
orig->bi_error = rbio->bio.bi_error;
bio_endio(orig);
- bch_rbio_free(c, rbio);
+ bch_rbio_free(rbio);
} else {
if (rbio->promote)
kfree(rbio->promote);
@@ -1010,30 +1012,16 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
}
}
-/*
- * Decide if we want to retry the read - returns true if read is being retried,
- * false if caller should pass error on up
- */
-static void bch_read_error_maybe_retry(struct bch_fs *c,
- struct bch_read_bio *rbio,
- int error)
+static void bch_rbio_error(struct bch_read_bio *rbio, int error)
{
- unsigned long flags;
-
- if ((error == -EINTR) &&
- (rbio->flags & BCH_READ_RETRY_IF_STALE)) {
- atomic_long_inc(&c->cache_read_races);
- goto retry;
- }
+ bch_rbio_parent(rbio)->bio.bi_error = error;
+ bch_rbio_done(rbio);
+}
- if (error == -EIO) {
- /* io error - do we have another replica? */
- }
+static void bch_rbio_retry(struct bch_fs *c, struct bch_read_bio *rbio)
+{
+ unsigned long flags;
- bch_rbio_parent(rbio)->bio.bi_error = error;
- bch_rbio_done(c, rbio);
- return;
-retry:
percpu_ref_put(&rbio->ca->io_ref);
rbio->ca = NULL;
@@ -1053,13 +1041,26 @@ static void cache_promote_done(struct closure *cl)
}
/* Inner part that may run in process context */
-static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
+static void __bch_read_endio(struct work_struct *work)
{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
int ret;
ret = bio_checksum_uncompress(c, rbio);
if (ret) {
- bch_read_error_maybe_retry(c, rbio, ret);
+ /*
+ * Checksum error: if the bio wasn't bounced, we may have been
+ * reading into buffers owned by userspace (that userspace can
+ * scribble over) - retry the read, bouncing it this time:
+ */
+ if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+ rbio->flags |= BCH_READ_FORCE_BOUNCE;
+ bch_rbio_retry(c, rbio);
+ } else {
+ bch_rbio_error(rbio, -EIO);
+ }
return;
}
@@ -1073,64 +1074,51 @@ static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
rbio->promote = NULL;
- bch_rbio_done(c, rbio);
+ bch_rbio_done(rbio);
closure_init(cl, &c->cl);
closure_call(&promote->write.op.cl, bch_write, c->wq, cl);
closure_return_with_destructor(cl, cache_promote_done);
} else {
- bch_rbio_done(c, rbio);
+ bch_rbio_done(rbio);
}
}
-void bch_bio_decompress_work(struct work_struct *work)
-{
- struct bio_decompress_worker *d =
- container_of(work, struct bio_decompress_worker, work);
- struct llist_node *list, *next;
- struct bch_read_bio *rbio;
-
- while ((list = llist_del_all(&d->bio_list)))
- for (list = llist_reverse_order(list);
- list;
- list = next) {
- next = llist_next(list);
- rbio = container_of(list, struct bch_read_bio, list);
-
- __bch_read_endio(d->c, rbio);
- }
-}
-
static void bch_read_endio(struct bio *bio)
{
struct bch_read_bio *rbio =
container_of(bio, struct bch_read_bio, bio);
- struct bch_fs *c = rbio->ca->fs;
- int stale = ((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
- ptr_stale(rbio->ca, &rbio->ptr) ? -EINTR : 0;
- int error = bio->bi_error ?: stale;
+ struct bch_fs *c = rbio->c;
- bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ);
+ if (rbio->flags & BCH_READ_ACCOUNT_TIMES)
+ bch_account_io_completion_time(rbio->ca, rbio->submit_time_us,
+ REQ_OP_READ);
- bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read");
-
- if (error) {
- bch_read_error_maybe_retry(c, rbio, error);
+ if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) {
+ /* XXX: retry IO errors when we have another replica */
+ bch_rbio_error(rbio, bio->bi_error);
return;
}
- if (rbio->crc.compression_type != BCH_COMPRESSION_NONE ||
- bch_csum_type_is_encryption(rbio->crc.csum_type)) {
- struct bio_decompress_worker *d;
+ if (rbio->ptr.cached &&
+ (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+ ptr_stale(rbio->ca, &rbio->ptr))) {
+ atomic_long_inc(&c->cache_read_races);
- preempt_disable();
- d = this_cpu_ptr(c->bio_decompress_worker);
- llist_add(&rbio->list, &d->bio_list);
- queue_work(system_highpri_wq, &d->work);
- preempt_enable();
- } else {
- __bch_read_endio(c, rbio);
+ if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+ bch_rbio_retry(c, rbio);
+ else
+ bch_rbio_error(rbio, -EINTR);
+ return;
}
+
+ if (rbio->crc.compression_type ||
+ bch_csum_type_is_encryption(rbio->crc.csum_type))
+ queue_work(system_unbound_wq, &rbio->work);
+ else if (rbio->crc.csum_type)
+ queue_work(system_highpri_wq, &rbio->work);
+ else
+ __bch_read_endio(&rbio->work);
}
static bool should_promote(struct bch_fs *c,
@@ -1194,6 +1182,8 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
if (pick->crc.compression_type != BCH_COMPRESSION_NONE ||
(pick->crc.csum_type != BCH_CSUM_NONE &&
(bvec_iter_sectors(iter) != crc_uncompressed_size(NULL, &pick->crc) ||
+ (bch_csum_type_is_encryption(pick->crc.csum_type) &&
+ (flags & BCH_READ_USER_MAPPED)) ||
(flags & BCH_READ_FORCE_BOUNCE)))) {
read_full = true;
bounce = true;
@@ -1242,11 +1232,12 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
rbio->orig_bi_end_io = orig->bio.bi_end_io;
rbio->parent_iter = iter;
- rbio->inode = k.k->p.inode;
rbio->flags = flags;
rbio->bounce = bounce;
rbio->split = split;
- rbio->version = k.k->version;
+ rbio->c = c;
+ rbio->ca = pick->ca;
+ rbio->ptr = pick->ptr;
rbio->crc = pick->crc;
/*
* crc.compressed_size will be 0 if there wasn't any checksum
@@ -1255,9 +1246,10 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
* only for promoting)
*/
rbio->crc._compressed_size = bio_sectors(&rbio->bio) - 1;
- rbio->ptr = pick->ptr;
- rbio->ca = pick->ca;
+ rbio->version = k.k->version;
rbio->promote = promote_op;
+ rbio->inode = k.k->p.inode;
+ INIT_WORK(&rbio->work, __bch_read_endio);
rbio->bio.bi_bdev = pick->ca->disk_sb.bdev;
rbio->bio.bi_opf = orig->bio.bi_opf;
@@ -1395,12 +1387,11 @@ void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode)
bch_increment_clock(c, bio_sectors(&bio->bio), READ);
bch_read_iter(c, bio, bio->bio.bi_iter, inode,
- BCH_READ_FORCE_BOUNCE|
BCH_READ_RETRY_IF_STALE|
BCH_READ_PROMOTE|
- BCH_READ_MAY_REUSE_BIO);
+ BCH_READ_MAY_REUSE_BIO|
+ BCH_READ_USER_MAPPED);
}
-EXPORT_SYMBOL(bch_read);
/**
* bch_read_retry - re-submit a bio originally from bch_read()
@@ -1409,19 +1400,17 @@ static void bch_read_retry(struct bch_fs *c, struct bch_read_bio *rbio)
{
struct bch_read_bio *parent = bch_rbio_parent(rbio);
struct bvec_iter iter = rbio->parent_iter;
+ unsigned flags = rbio->flags;
u64 inode = rbio->inode;
trace_bcache_read_retry(&rbio->bio);
if (rbio->split)
- bch_rbio_free(c, rbio);
+ bch_rbio_free(rbio);
else
rbio->bio.bi_end_io = rbio->orig_bi_end_io;
- bch_read_iter(c, parent, iter, inode,
- BCH_READ_FORCE_BOUNCE|
- BCH_READ_RETRY_IF_STALE|
- BCH_READ_PROMOTE);
+ bch_read_iter(c, parent, iter, inode, flags);
}
void bch_read_retry_work(struct work_struct *work)
diff --git a/libbcache/io.h b/libbcache/io.h
index 302ed2e..9239ca4 100644
--- a/libbcache/io.h
+++ b/libbcache/io.h
@@ -69,6 +69,8 @@ enum bch_read_flags {
BCH_READ_PROMOTE = 1 << 2,
BCH_READ_IS_LAST = 1 << 3,
BCH_READ_MAY_REUSE_BIO = 1 << 4,
+ BCH_READ_ACCOUNT_TIMES = 1 << 5,
+ BCH_READ_USER_MAPPED = 1 << 6,
};
void bch_read(struct bch_fs *, struct bch_read_bio *, u64);
@@ -85,6 +87,4 @@ int bch_discard(struct bch_fs *, struct bpos, struct bpos,
void bch_read_retry_work(struct work_struct *);
void bch_wake_delayed_writes(unsigned long data);
-void bch_bio_decompress_work(struct work_struct *);
-
#endif /* _BCACHE_IO_H */
diff --git a/libbcache/io_types.h b/libbcache/io_types.h
index 3d09687..ca1b019 100644
--- a/libbcache/io_types.h
+++ b/libbcache/io_types.h
@@ -29,29 +29,29 @@ struct bch_read_bio {
*/
struct bvec_iter parent_iter;
- /*
- * If we have to retry the read (IO error, checksum failure, read stale
- * data (raced with allocator), we retry the portion of the parent bio
- * that failed (i.e. this bio's portion, parent_iter).
- *
- * But we need to stash the inode somewhere:
- */
- u64 inode;
-
unsigned submit_time_us;
u16 flags;
u8 bounce:1,
split:1;
- struct bversion version;
- struct bch_extent_crc128 crc;
- struct bch_extent_ptr ptr;
+ struct bch_fs *c;
struct bch_dev *ca;
+ struct bch_extent_ptr ptr;
+ struct bch_extent_crc128 crc;
+ struct bversion version;
struct cache_promote_op *promote;
- /* bio_decompress_worker list */
- struct llist_node list;
+ /*
+ * If we have to retry the read (IO error, checksum failure, read stale
+ * data (raced with allocator), we retry the portion of the parent bio
+ * that failed (i.e. this bio's portion, parent_iter).
+ *
+ * But we need to stash the inode somewhere:
+ */
+ u64 inode;
+
+ struct work_struct work;
struct bio bio;
};
@@ -63,7 +63,7 @@ bch_rbio_parent(struct bch_read_bio *rbio)
}
struct bch_write_bio {
- struct bch_fs *c;
+ struct bch_fs *c;
struct bch_dev *ca;
union {
struct bio *orig;
@@ -142,10 +142,4 @@ struct bch_write_op {
u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2];
};
-struct bio_decompress_worker {
- struct bch_fs *c;
- struct work_struct work;
- struct llist_head bio_list;
-};
-
#endif /* _BCACHE_IO_TYPES_H */
diff --git a/libbcache/notify.h b/libbcache/notify.h
index 8823c06..2c1e367 100644
--- a/libbcache/notify.h
+++ b/libbcache/notify.h
@@ -16,9 +16,6 @@ void bch_notify_fs_stopped(struct bch_fs *);
void bch_notify_dev_read_write(struct bch_dev *);
void bch_notify_dev_read_only(struct bch_dev *);
void bch_notify_dev_added(struct bch_dev *);
-void bch_notify_dev_removing(struct bch_dev *);
-void bch_notify_dev_removed(struct bch_dev *);
-void bch_notify_dev_remove_failed(struct bch_dev *);
void bch_notify_dev_error(struct bch_dev *, bool);
#else
@@ -30,9 +27,6 @@ static inline void bch_notify_fs_stopped(struct bch_fs *c) {}
static inline void bch_notify_dev_read_write(struct bch_dev *ca) {}
static inline void bch_notify_dev_read_only(struct bch_dev *ca) {}
static inline void bch_notify_dev_added(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removing(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removed(struct bch_dev *ca) {}
-static inline void bch_notify_dev_remove_failed(struct bch_dev *ca) {}
static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {}
#endif
diff --git a/libbcache/request.c b/libbcache/request.c
index e41cfb4..b24770b 100644
--- a/libbcache/request.c
+++ b/libbcache/request.c
@@ -500,7 +500,7 @@ retry:
s->read_dirty_data = true;
bch_read_extent(c, &s->rbio, k, &pick,
- BCH_READ_FORCE_BOUNCE|
+ BCH_READ_ACCOUNT_TIMES|
BCH_READ_RETRY_IF_STALE|
(!s->bypass ? BCH_READ_PROMOTE : 0)|
(is_last ? BCH_READ_IS_LAST : 0));
diff --git a/libbcache/super.c b/libbcache/super.c
index 1e272af..f5f7493 100644
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -1453,57 +1453,26 @@ int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
return ret;
}
-#if 0
-int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca)
-{
- /* First, go RO before we try to migrate data off: */
- ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
- if (ret)
- return ret;
-
- bch_notify_dev_removing(ca);
-
- /* Migrate data, metadata off device: */
-
- ret = bch_move_data_off_device(ca);
- if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) {
- bch_err(c, "Remove of %s failed, unable to migrate data off",
- name);
- return ret;
- }
-
- if (ret)
- ret = bch_flag_data_bad(ca);
- if (ret) {
- bch_err(c, "Remove of %s failed, unable to migrate data off",
- name);
- return ret;
- }
-
- ret = bch_move_metadata_off_device(ca);
- if (ret)
- return ret;
-}
-#endif
-
/* Device add/removal: */
-static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
+int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
struct bch_sb_field_members *mi;
unsigned dev_idx = ca->dev_idx;
- int ret;
+ int ret = -EINVAL;
+
+ mutex_lock(&c->state_lock);
+
+ percpu_ref_put(&ca->ref); /* XXX */
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
bch_err(ca, "Cannot remove RW device");
- bch_notify_dev_remove_failed(ca);
- return -EINVAL;
+ goto err;
}
if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
bch_err(ca, "Cannot remove without losing data");
- bch_notify_dev_remove_failed(ca);
- return -EINVAL;
+ goto err;
}
/*
@@ -1514,20 +1483,18 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
ret = bch_flag_data_bad(ca);
if (ret) {
bch_err(ca, "Remove failed");
- return ret;
+ goto err;
}
if (ca->mi.has_data || ca->mi.has_metadata) {
- bch_err(ca, "Can't remove, still has data");
- return ret;
+ bch_err(ca, "Remove failed, still has data");
+ goto err;
}
/*
* Ok, really doing the remove:
* Drop device's prio pointer before removing it from superblock:
*/
- bch_notify_dev_removed(ca);
-
spin_lock(&c->journal.lock);
c->journal.prio_buckets[dev_idx] = 0;
spin_unlock(&c->journal.lock);
@@ -1549,19 +1516,10 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
bch_write_super(c);
mutex_unlock(&c->sb_lock);
-
+ mutex_unlock(&c->state_lock);
return 0;
-}
-
-int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-{
- int ret;
-
- mutex_lock(&c->state_lock);
- percpu_ref_put(&ca->ref);
- ret = __bch_dev_remove(c, ca, flags);
+err:
mutex_unlock(&c->state_lock);
-
return ret;
}
@@ -1680,6 +1638,8 @@ err:
int bch_dev_online(struct bch_fs *c, const char *path)
{
struct bcache_superblock sb = { 0 };
+ struct bch_dev *ca;
+ unsigned dev_idx;
const char *err;
mutex_lock(&c->state_lock);
@@ -1688,17 +1648,27 @@ int bch_dev_online(struct bch_fs *c, const char *path)
if (err)
goto err;
+ dev_idx = sb.sb->dev_idx;
+
err = bch_dev_in_fs(c->disk_sb, sb.sb);
if (err)
goto err;
mutex_lock(&c->sb_lock);
if (__bch_dev_online(c, &sb)) {
+ err = "__bch_dev_online() error";
mutex_unlock(&c->sb_lock);
goto err;
}
mutex_unlock(&c->sb_lock);
+ ca = c->devs[dev_idx];
+ if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+ err = __bch_dev_read_write(c, ca);
+ if (err)
+ goto err;
+ }
+
mutex_unlock(&c->state_lock);
return 0;
err:
@@ -1725,7 +1695,7 @@ int bch_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
return 0;
}
-int bch_dev_migrate(struct bch_fs *c, struct bch_dev *ca)
+int bch_dev_evacuate(struct bch_fs *c, struct bch_dev *ca)
{
int ret;
diff --git a/libbcache/super.h b/libbcache/super.h
index 79da390..66c3430 100644
--- a/libbcache/super.h
+++ b/libbcache/super.h
@@ -107,7 +107,7 @@ int bch_dev_remove(struct bch_fs *, struct bch_dev *, int);
int bch_dev_add(struct bch_fs *, const char *);
int bch_dev_online(struct bch_fs *, const char *);
int bch_dev_offline(struct bch_fs *, struct bch_dev *, int);
-int bch_dev_migrate(struct bch_fs *, struct bch_dev *);
+int bch_dev_evacuate(struct bch_fs *, struct bch_dev *);
void bch_fs_detach(struct bch_fs *);
diff --git a/libbcache/util.h b/libbcache/util.h
index 2b171a1..88cbe30 100644
--- a/libbcache/util.h
+++ b/libbcache/util.h
@@ -1,6 +1,7 @@
#ifndef _BCACHE_UTIL_H
#define _BCACHE_UTIL_H
+#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/errno.h>
#include <linux/blkdev.h>
@@ -722,4 +723,33 @@ static inline void memmove_u64s(void *dst, const void *src,
__memmove_u64s_up(dst, src, u64s);
}
+static inline struct bio_vec next_contig_bvec(struct bio *bio,
+ struct bvec_iter *iter)
+{
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ bio_advance_iter(bio, iter, bv.bv_len);
+#ifndef CONFIG_HIGHMEM
+ while (iter->bi_size) {
+ struct bio_vec next = bio_iter_iovec(bio, *iter);
+
+ if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len !=
+ page_address(next.bv_page) + next.bv_offset)
+ break;
+
+ bv.bv_len += next.bv_len;
+ bio_advance_iter(bio, iter, next.bv_len);
+ }
+#endif
+ return bv;
+}
+
+#define __bio_for_each_contig_segment(bv, bio, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bv = next_contig_bvec((bio), &(iter))), 1);)
+
+#define bio_for_each_contig_segment(bv, bio, iter) \
+ __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
+
#endif /* _BCACHE_UTIL_H */