summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-06-04 22:46:12 -0700
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:34:24 -0800
commit9485f4233fddb6ac6b2de028554f2c2e448b119b (patch)
tree894247b07b04d6bf21b67f4fb79e5bad5f2f8fb7
parent5073b4e77d2dbb8231fdf82075edf48cc64f2ffa (diff)
bcache: Plump through journal error checks
-rw-r--r--drivers/md/bcache/alloc.c24
-rw-r--r--drivers/md/bcache/btree.c28
-rw-r--r--drivers/md/bcache/fs.c29
-rw-r--r--drivers/md/bcache/io.c15
-rw-r--r--drivers/md/bcache/journal.c67
-rw-r--r--drivers/md/bcache/journal.h15
-rw-r--r--drivers/md/bcache/journal_types.h1
-rw-r--r--drivers/md/bcache/request.c6
-rw-r--r--drivers/md/bcache/super.c27
-rw-r--r--drivers/md/bcache/sysfs.c30
10 files changed, 145 insertions, 97 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 442a3575dc0b..0e001cdae0e4 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -234,13 +234,10 @@ static int prio_io(struct cache *ca, uint64_t bucket, int op)
return submit_bio_wait(ca->bio_prio);
}
-static void bch_prio_write(struct cache *ca)
+static int bch_prio_write(struct cache *ca)
{
struct cache_set *c = ca->set;
int i, ret;
- struct closure cl;
-
- closure_init_stack(&cl);
trace_bcache_prio_write_start(ca);
@@ -286,10 +283,12 @@ static void bch_prio_write(struct cache *ca)
ret = prio_io(ca, r, REQ_OP_WRITE);
if (bch_meta_write_fault("prio"))
ret = -EIO;
- if (ret)
+ if (ret) {
bch_cache_error(ca,
"IO error %d writing prios to bucket %lu",
ret, r);
+ return ret;
+ }
}
spin_lock(&c->journal.lock);
@@ -299,8 +298,12 @@ static void bch_prio_write(struct cache *ca)
c->journal.nr_prio_buckets);
spin_unlock(&c->journal.lock);
- bch_journal_meta(&c->journal, &cl);
- closure_sync(&cl);
+ ret = bch_journal_meta(&c->journal);
+ if (ret) {
+ __bch_cache_set_error(c,
+ "IO error %d journalling new prios", ret);
+ return ret;
+ }
/*
* Don't want the old priorities to get garbage collected until after we
@@ -320,6 +323,7 @@ static void bch_prio_write(struct cache *ca)
spin_unlock(&ca->prio_buckets_lock);
trace_bcache_prio_write_end(ca);
+ return 0;
}
int bch_prio_read(struct cache *ca)
@@ -764,6 +768,7 @@ static int bch_allocator_thread(void *arg)
{
struct cache *ca = arg;
struct cache_set *c = ca->set;
+ int ret;
while (1) {
/*
@@ -830,7 +835,10 @@ static int bch_allocator_thread(void *arg)
* free_inc is full of newly-invalidated buckets, must write out
* prios and gens before they can be re-used
*/
- bch_prio_write(ca);
+ ret = bch_prio_write(ca);
+ if (ret) {
+ /* XXX: need to stop the allocator thread */
+ }
}
out:
/*
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 51b521669052..1cc4635a1e1e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -913,7 +913,7 @@ void bch_btree_node_flush_journal_entries(struct cache_set *c,
u64 seq = b->keys.set[i].data->journal_seq;
if (seq) {
- bch_journal_push_seq(&c->journal, seq, cl);
+ bch_journal_flush_seq_async(&c->journal, seq, cl);
break;
}
}
@@ -1710,12 +1710,9 @@ static void bch_btree_set_root_initial(struct cache_set *c, struct btree *b)
* is nothing new to be done. This just guarantees that there is a
* journal write.
*/
-static void bch_btree_set_root(struct cache_set *c, struct btree *b)
+static int bch_btree_set_root(struct cache_set *c, struct btree *b)
{
struct btree *old;
- struct closure cl;
-
- closure_init_stack(&cl);
trace_bcache_btree_set_root(b);
BUG_ON(!b->written);
@@ -1743,8 +1740,7 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b)
* Ensure new btree root is persistent (reachable via the
* journal) before returning and the caller unlocking it:
*/
- bch_journal_meta(&c->journal, &cl);
- closure_sync(&cl);
+ return bch_journal_meta(&c->journal);
}
static struct btree *__bch_btree_node_alloc(struct cache_set *c,
@@ -2071,6 +2067,12 @@ int bch_btree_node_rewrite(struct btree *b, struct btree_iter *iter, bool wait)
bch_btree_node_write(n, &cl, NULL);
closure_sync(&cl);
+ if (bch_journal_error(&c->journal)) {
+ bch_btree_node_free_never_inserted(c, n);
+ six_unlock_intent(&n->lock);
+ return -EIO;
+ }
+
if (parent) {
ret = bch_btree_insert_node(parent, iter,
&keylist_single(&n->key),
@@ -2585,13 +2587,21 @@ static int btree_split(struct btree *b, struct btree_iter *iter,
/* Wait on journal flush and btree node writes: */
closure_sync(&state->stack_cl);
+ /* Check for journal error after waiting on the journal flush: */
+ if (bch_journal_error(&c->journal))
+ goto err;
+
/* New nodes all written, now make them visible: */
if (n3) {
- bch_btree_set_root(c, n3);
+ ret = bch_btree_set_root(c, n3);
+ if (ret)
+ goto err;
} else if (!parent) {
/* Root filled up but didn't need to be split */
- bch_btree_set_root(c, n1);
+ ret = bch_btree_set_root(c, n1);
+ if (ret)
+ goto err;
/* Drop key we ended up not using: */
bch_keylist_init(&state->parent_keys,
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index 7731c0409ce8..30660b003d75 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -668,11 +668,8 @@ static int bch_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct inode *inode = file->f_mapping->host;
struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
- struct closure cl;
int ret;
- closure_init_stack(&cl);
-
/*
* We really just want to sync all the PageAppend pages:
*/
@@ -702,10 +699,10 @@ static int bch_fsync(struct file *file, loff_t start, loff_t end, int datasync)
out:
inode_unlock(inode);
- bch_journal_push_seq(&c->journal, ei->journal_seq, &cl);
- closure_sync(&cl);
+ if (ret)
+ return ret;
- return ret;
+ return bch_journal_flush_seq(&c->journal, ei->journal_seq);
}
/* Flags that are appropriate for non-directories/regular files. */
@@ -1756,13 +1753,8 @@ static int bch_write_inode(struct inode *inode, struct writeback_control *wbc)
ret = __bch_write_inode(inode);
mutex_unlock(&ei->update_lock);
- if (!ret && wbc->sync_mode == WB_SYNC_ALL) {
- struct closure cl;
-
- closure_init_stack(&cl);
- bch_journal_push_seq(&c->journal, ei->journal_seq, &cl);
- closure_sync(&cl);
- }
+ if (!ret && wbc->sync_mode == WB_SYNC_ALL)
+ ret = bch_journal_flush_seq(&c->journal, ei->journal_seq);
return ret;
}
@@ -1833,14 +1825,13 @@ static int bch_statfs(struct dentry *dentry, struct kstatfs *buf)
static int bch_sync_fs(struct super_block *sb, int wait)
{
struct cache_set *c = sb->s_fs_info;
- struct closure cl;
- closure_init_stack(&cl);
+ if (!wait) {
+ bch_journal_flush_async(&c->journal, NULL);
+ return 0;
+ }
- /* XXX: should only push a journal write if it's dirty */
- bch_journal_flush(&c->journal, wait ? &cl : NULL);
- closure_sync(&cl);
- return 0;
+ return bch_journal_flush(&c->journal);
}
static struct cache_set *bch_open_as_blockdevs(const char *_dev_name,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 1beeb0cb5494..1a4736111b44 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -517,7 +517,7 @@ static void bch_write_done(struct closure *cl)
BUG_ON(!op->write_done);
if (!op->error && op->flush)
- bch_journal_push_seq(&op->c->journal, *op_journal_seq(op), cl);
+ op->error = bch_journal_error(&op->c->journal);
if (op->replace_collision) {
trace_bcache_promote_collision(&op->replace_info.key.k);
@@ -553,10 +553,17 @@ static void bch_write_index(struct closure *cl)
op->open_buckets[i] = NULL;
}
- if (op->write_done)
- continue_at_nobarrier(cl, bch_write_done, NULL);
- else
+ if (!op->write_done)
continue_at(cl, __bch_write, op->io_wq);
+
+ if (!op->error && op->flush) {
+ bch_journal_flush_seq_async(&op->c->journal,
+ *op_journal_seq(op),
+ cl);
+ continue_at(cl, bch_write_done, op->c->wq);
+ } else {
+ continue_at_nobarrier(cl, bch_write_done, NULL);
+ }
}
/**
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 770c36e57ae9..df373ee7a0b9 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -1321,8 +1321,7 @@ static void journal_write_endio(struct bio *bio)
struct journal_write *w = bio->bi_private;
if (bio->bi_error || bch_meta_write_fault("journal"))
- bch_cache_error(ca, "IO error %d writing journal",
- bio->bi_error);
+ set_bit(JOURNAL_ERROR, &ca->set->journal.flags);
closure_put(&w->j->io);
percpu_ref_put(&ca->ref);
@@ -1702,7 +1701,7 @@ void bch_journal_res_get(struct journal *j, struct journal_res *res,
bch_time_stats_update(&j->full_time, start_time);
}
-void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent)
+void bch_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *parent)
{
spin_lock(&j->lock);
@@ -1724,7 +1723,33 @@ void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent)
journal_unlock(j);
}
-void bch_journal_meta(struct journal *j, struct closure *parent)
+int bch_journal_flush_seq(struct journal *j, u64 seq)
+{
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ bch_journal_flush_seq_async(j, seq, &cl);
+ closure_sync(&cl);
+
+ return bch_journal_error(j);
+}
+
+void bch_journal_meta_async(struct journal *j, struct closure *parent)
+{
+ struct journal_res res;
+ unsigned u64s = jset_u64s(0);
+ u64 seq;
+
+ memset(&res, 0, sizeof(res));
+
+ bch_journal_res_get(j, &res, u64s, u64s);
+ seq = j->seq;
+ bch_journal_res_put(j, &res);
+
+ bch_journal_flush_seq_async(j, seq, parent);
+}
+
+int bch_journal_meta(struct journal *j)
{
struct journal_res res;
unsigned u64s = jset_u64s(0);
@@ -1736,10 +1761,10 @@ void bch_journal_meta(struct journal *j, struct closure *parent)
seq = j->seq;
bch_journal_res_put(j, &res);
- bch_journal_push_seq(j, seq, parent);
+ return bch_journal_flush_seq(j, seq);
}
-void bch_journal_flush(struct journal *j, struct closure *parent)
+void bch_journal_flush_async(struct journal *j, struct closure *parent)
{
u64 seq;
@@ -1754,7 +1779,25 @@ void bch_journal_flush(struct journal *j, struct closure *parent)
}
spin_unlock(&j->lock);
- bch_journal_push_seq(j, seq, parent);
+ bch_journal_flush_seq_async(j, seq, parent);
+}
+
+int bch_journal_flush(struct journal *j)
+{
+ u64 seq;
+
+ spin_lock(&j->lock);
+ if (test_bit(JOURNAL_DIRTY, &j->flags)) {
+ seq = j->seq;
+ } else if (j->seq) {
+ seq = j->seq - 1;
+ } else {
+ spin_unlock(&j->lock);
+ return 0;
+ }
+ spin_unlock(&j->lock);
+
+ return bch_journal_flush_seq(j, seq);
}
void bch_journal_free(struct journal *j)
@@ -1875,15 +1918,12 @@ static bool bch_journal_writing_to_device(struct cache *ca)
int bch_journal_move(struct cache *ca)
{
- struct closure cl;
unsigned i, nr_buckets;
u64 last_flushed_seq;
struct cache_set *c = ca->set;
struct journal *j = &c->journal;
int ret = 0; /* Success */
- closure_init_stack(&cl);
-
if (bch_journal_writing_to_device(ca)) {
/*
* bch_journal_meta will write a record and we'll wait
@@ -1892,9 +1932,7 @@ int bch_journal_move(struct cache *ca)
* will call journal_next_bucket which notices that the
* device is no longer writeable, and picks a new one.
*/
- bch_journal_meta(j, &cl);
- /* Wait for the meta-data write */
- closure_sync(&cl);
+ bch_journal_meta(j);
BUG_ON(bch_journal_writing_to_device(ca));
}
@@ -1910,8 +1948,7 @@ int bch_journal_move(struct cache *ca)
* we have newer journal entries in devices other than ca,
* and wait for the meta data write to complete.
*/
- bch_journal_meta(j, &cl);
- closure_sync(&cl);
+ bch_journal_meta(j);
/*
* Verify that we no longer need any of the journal entries in
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index 6b7f6424152f..d7f1bb208921 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -201,9 +201,18 @@ void bch_journal_res_put(struct journal *, struct journal_res *);
void bch_journal_res_get(struct journal *, struct journal_res *,
unsigned, unsigned);
-void bch_journal_push_seq(struct journal *, u64, struct closure *);
-void bch_journal_meta(struct journal *, struct closure *);
-void bch_journal_flush(struct journal *, struct closure *);
+void bch_journal_flush_seq_async(struct journal *, u64, struct closure *);
+void bch_journal_flush_async(struct journal *, struct closure *);
+void bch_journal_meta_async(struct journal *, struct closure *);
+
+int bch_journal_flush_seq(struct journal *, u64);
+int bch_journal_flush(struct journal *);
+int bch_journal_meta(struct journal *);
+
+static inline int bch_journal_error(struct journal *j)
+{
+ return test_bit(JOURNAL_ERROR, &j->flags) ? -EIO : 0;
+}
/*
* Amount of space that will be taken up by some keys in the journal (i.e.
diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h
index 4a67da8737a4..4c1913431410 100644
--- a/drivers/md/bcache/journal_types.h
+++ b/drivers/md/bcache/journal_types.h
@@ -86,6 +86,7 @@ enum {
JOURNAL_IO_IN_FLIGHT,
JOURNAL_WRITE_IDX,
JOURNAL_REPLAY_DONE,
+ JOURNAL_ERROR,
};
/* Embedded in struct cache_set */
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 7d2145689482..e7429d89c5c7 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -582,7 +582,8 @@ static void __cached_dev_make_request(struct request_queue *q, struct bio *bio)
if (!bio->bi_iter.bi_size) {
if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_meta(&s->iop.c->journal, &s->cl);
+ bch_journal_flush_async(&s->iop.c->journal,
+ &s->cl);
/*
* If it's a flush, we send the flush to the backing
@@ -673,7 +674,8 @@ static void __blockdev_volume_make_request(struct request_queue *q,
s = search_alloc(bio, d);
if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_meta(&s->iop.c->journal, &s->cl);
+ bch_journal_flush_async(&s->iop.c->journal,
+ &s->cl);
continue_at(&s->cl, search_free, NULL);
} else if (rw) {
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6c0c44274c1e..65eb3a258098 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -659,12 +659,9 @@ static void __bch_cache_read_only(struct cache *ca);
static void __bch_cache_set_read_only(struct cache_set *c)
{
- struct closure cl;
struct cache *ca;
unsigned i;
- closure_init_stack(&cl);
-
c->tiering_pd.rate.rate = UINT_MAX;
bch_ratelimit_reset(&c->tiering_pd.rate);
bch_tiering_read_stop(c);
@@ -681,8 +678,7 @@ static void __bch_cache_set_read_only(struct cache_set *c)
for_each_cache(ca, c, i)
bch_cache_allocator_stop(ca);
- bch_journal_flush(&c->journal, &cl);
- closure_sync(&cl);
+ bch_journal_flush(&c->journal);
cancel_delayed_work_sync(&c->journal.write_work);
}
@@ -1137,15 +1133,12 @@ static const char *run_cache_set(struct cache_set *c)
const char *err = "cannot allocate memory";
struct cache_member_rcu *mi;
struct cache *ca;
- struct closure cl;
unsigned i, id;
long now;
lockdep_assert_held(&bch_register_lock);
BUG_ON(test_bit(CACHE_SET_RUNNING, &c->flags));
- closure_init_stack(&cl);
-
/* We don't want bch_cache_set_error() to free underneath us */
closure_get(&c->caching);
@@ -1244,6 +1237,9 @@ static const char *run_cache_set(struct cache_set *c)
bch_verify_inode_refs(c);
} else {
struct bkey_i_inode inode;
+ struct closure cl;
+
+ closure_init_stack(&cl);
pr_notice("invalidating existing data");
@@ -1272,8 +1268,10 @@ static const char *run_cache_set(struct cache_set *c)
err = "cannot allocate new btree root";
for (id = 0; id < BTREE_ID_NR; id++)
- if (bch_btree_root_alloc(c, id, &cl))
+ if (bch_btree_root_alloc(c, id, &cl)) {
+ closure_sync(&cl);
goto err;
+ }
/* Wait for new btree roots to be written: */
closure_sync(&cl);
@@ -1289,8 +1287,9 @@ static const char *run_cache_set(struct cache_set *c)
NULL, NULL, 0))
goto err;
- bch_journal_meta(&c->journal, &cl);
- closure_sync(&cl);
+ err = "error writing first journal entry";
+ if (bch_journal_meta(&c->journal))
+ goto err;
/* Mark cache set as initialized: */
SET_CACHE_SYNC(&c->sb, true);
@@ -1332,7 +1331,6 @@ static const char *run_cache_set(struct cache_set *c)
return NULL;
err:
- closure_sync(&cl);
bch_cache_set_unregister(c);
closure_put(&c->caching);
return err;
@@ -1663,9 +1661,7 @@ static void bch_cache_remove_work(struct work_struct *work)
char name[BDEVNAME_SIZE];
bool force = test_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags);
unsigned dev = ca->sb.nr_this_dev;
- struct closure cl;
- closure_init_stack(&cl);
bdevname(ca->disk_sb.bdev, name);
/*
@@ -1714,8 +1710,7 @@ static void bch_cache_remove_work(struct work_struct *work)
c->journal.prio_buckets[dev] = 0;
spin_unlock(&c->journal.lock);
- bch_journal_meta(&c->journal, &cl);
- closure_sync(&cl);
+ bch_journal_meta(&c->journal);
/*
* Stop device before removing it from the cache set's list of devices -
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 90c0078d7fd7..0a4e2d945bf0 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -247,9 +247,6 @@ STORE(__cached_dev)
unsigned v = size;
struct cache_set *c;
struct kobj_uevent_env *env;
- struct closure cl;
-
- closure_init_stack(&cl);
#define d_strtoul(var) sysfs_strtoul(var, dc->var)
#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
@@ -315,9 +312,10 @@ STORE(__cached_dev)
return ret;
if (dc->disk.c)
- bch_journal_push_seq(&dc->disk.c->journal,
- journal_seq, &cl);
- closure_sync(&cl);
+ ret = bch_journal_flush_seq(&dc->disk.c->journal,
+ journal_seq);
+ if (ret)
+ return ret;
env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
if (!env)
@@ -424,9 +422,6 @@ STORE(__bch_blockdev_volume)
{
struct bcache_device *d = container_of(kobj, struct bcache_device,
kobj);
- struct closure cl;
-
- closure_init_stack(&cl);
sysfs_strtoul(data_csum, d->data_csum);
@@ -453,8 +448,9 @@ STORE(__bch_blockdev_volume)
if (ret)
return ret;
- bch_journal_push_seq(&d->c->journal, journal_seq, &cl);
- closure_sync(&cl);
+ ret = bch_journal_flush_seq(&d->c->journal, journal_seq);
+ if (ret)
+ return ret;
set_capacity(d->disk, d->inode.v.i_inode.i_size >> 9);
}
@@ -470,11 +466,7 @@ STORE(__bch_blockdev_volume)
mutex_unlock(&d->inode_lock);
- if (ret)
- return ret;
-
- bch_journal_push_seq(&d->c->journal, journal_seq, &cl);
- closure_sync(&cl);
+ return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq);
}
if (attr == &sysfs_unregister) {
@@ -845,11 +837,7 @@ STORE(__bch_cache_set)
sysfs_strtoul(btree_flush_delay, c->btree_flush_delay);
if (attr == &sysfs_journal_flush) {
- struct closure cl;
-
- closure_init_stack(&cl);
- bch_journal_meta(&c->journal, &cl);
- closure_sync(&cl);
+ bch_journal_meta_async(&c->journal, NULL);
return size;
}