bcachefs: prevent reordering in write path

author: Kent Overstreet <kent.overstreet@gmail.com> 2018-03-29 14:22:06 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2018-04-04 11:40:56 -0400
commit: 17d15dd6ae966cbbd59190054496b771d96cefd5 (patch)
tree: d16647b4605c4a4121b123ce0049827bd13aed38
parent: 00a13ba01fe9d0d8d1ab9c8e932fef4172f08bed (diff)
2 files changed, 28 insertions, 62 deletions
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index fbed5361e25c..4d791fd9ae51 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -196,8 +196,6 @@ static void bch2_write_done(struct closure *cl)
 {
 	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
 
-	BUG_ON(!(op->flags & BCH_WRITE_DONE));
-
 	if (!op->error && (op->flags & BCH_WRITE_FLUSH))
 		op->error = bch2_journal_error(&op->c->journal);
 
@@ -205,7 +203,6 @@ static void bch2_write_done(struct closure *cl)
 		bch2_disk_reservation_put(op->c, &op->res);
 	percpu_ref_put(&op->c->writes);
 	bch2_keylist_free(&op->insert_keys, op->inline_keys);
-	op->flags &= ~(BCH_WRITE_DONE|BCH_WRITE_LOOPED);
 
 	closure_return(cl);
 }
@@ -232,9 +229,8 @@ int bch2_write_index_default(struct bch_write_op *op)
 /**
  * bch_write_index - after a write, update index to point to new data
  */
-static void bch2_write_index(struct closure *cl)
+static void __bch2_write_index(struct bch_write_op *op)
 {
-	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
 	struct bch_fs *c = op->c;
 	struct keylist *keys = &op->insert_keys;
 	struct bkey_s_extent e;
@@ -242,8 +238,6 @@ static void bch2_write_index(struct closure *cl)
 	struct bkey_i *src, *dst = keys->keys, *n, *k;
 	int ret;
 
-	op->flags |= BCH_WRITE_LOOPED;
-
 	for (src = keys->keys; src != keys->top; src = n) {
 		n = bkey_next(src);
 		bkey_copy(dst, src);
@@ -292,9 +286,19 @@ static void bch2_write_index(struct closure *cl)
 	}
 out:
 	bch2_open_bucket_put_refs(c, &op->open_buckets_nr, op->open_buckets);
+	return;
+err:
+	keys->top = keys->keys;
+	op->error = ret;
+	goto out;
+}
 
-	if (!(op->flags & BCH_WRITE_DONE))
-		continue_at(cl, __bch2_write, op->io_wq);
+static void bch2_write_index(struct closure *cl)
+{
+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+	struct bch_fs *c = op->c;
+
+	__bch2_write_index(op);
 
 	if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
 		bch2_journal_flush_seq_async(&c->journal,
@@ -304,12 +308,6 @@ out:
 	} else {
 		continue_at_nobarrier(cl, bch2_write_done, NULL);
 	}
-	return;
-err:
-	keys->top = keys->keys;
-	op->error = ret;
-	op->flags |= BCH_WRITE_DONE;
-	goto out;
 }
 
 static void bch2_write_endio(struct bio *bio)
@@ -730,18 +728,18 @@ static void __bch2_write(struct closure *cl)
 	struct bch_fs *c = op->c;
 	struct write_point *wp;
 	int ret;
-
+again:
 	do {
 		/* +1 for possible cache device: */
 		if (op->open_buckets_nr + op->nr_replicas + 1 >
 		    ARRAY_SIZE(op->open_buckets))
-			continue_at(cl, bch2_write_index, index_update_wq(op));
+			goto flush_io;
 
 		if (bch2_keylist_realloc(&op->insert_keys,
 					op->inline_keys,
 					ARRAY_SIZE(op->inline_keys),
 					BKEY_EXTENT_U64s_MAX))
-			continue_at(cl, bch2_write_index, index_update_wq(op));
+			goto flush_io;
 
 		wp = bch2_alloc_sectors_start(c,
 			op->target,
@@ -760,33 +758,7 @@ static void __bch2_write(struct closure *cl)
 				goto err;
 			}
 
-			/*
-			 * If we already have some keys, must insert them first
-			 * before allocating another open bucket. We only hit
-			 * this case if open_bucket_nr > 1.
-			 */
-			if (!bch2_keylist_empty(&op->insert_keys))
-				continue_at(cl, bch2_write_index,
-					    index_update_wq(op));
-
-			/*
-			 * If we've looped, we're running out of a workqueue -
-			 * not the bch2_write() caller's context - and we don't
-			 * want to block the workqueue:
-			 */
-			if (op->flags & BCH_WRITE_LOOPED)
-				continue_at(cl, __bch2_write, op->io_wq);
-
-			/*
-			 * Otherwise, we do want to block the caller on alloc
-			 * failure instead of letting it queue up more and more
-			 * writes:
-			 * XXX: this technically needs a try_to_freeze() -
-			 * except that that's not safe because caller may have
-			 * issued other IO... hmm..
-			 */
-			closure_sync(cl);
-			continue;
+			goto flush_io;
 		}
 
 		ret = bch2_write_extent(op, wp);
@@ -802,28 +774,24 @@ static void __bch2_write(struct closure *cl)
 			goto err;
 	} while (ret);
 
-	op->flags |= BCH_WRITE_DONE;
 	continue_at(cl, bch2_write_index, index_update_wq(op));
 err:
-	/*
-	 * Right now we can only error here if we went RO - the
-	 * allocation failed, but we already checked for -ENOSPC when we
-	 * got our reservation.
-	 *
-	 * XXX capacity might have changed, but we don't check for that
-	 * yet:
-	 */
 	op->error = ret;
-	op->flags |= BCH_WRITE_DONE;
 
-	/*
-	 * No reason not to insert keys for whatever data was successfully
-	 * written (especially for a cmpxchg operation that's moving data
-	 * around)
-	 */
 	continue_at(cl, !bch2_keylist_empty(&op->insert_keys)
 		    ? bch2_write_index
 		    : bch2_write_done, index_update_wq(op));
+flush_io:
+	closure_sync(cl);
+
+	if (!bch2_keylist_empty(&op->insert_keys)) {
+		__bch2_write_index(op);
+
+		if (op->error)
+			continue_at_nobarrier(cl, bch2_write_done, NULL);
+	}
+
+	goto again;
 }
 
 /**
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
index bf0b17e1deb9..a0c795abe9bd 100644
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -36,8 +36,6 @@ enum bch_write_flags {
 
 	/* Internal: */
 	BCH_WRITE_JOURNAL_SEQ_PTR	= (1 << 9),
-	BCH_WRITE_DONE			= (1 << 10),
-	BCH_WRITE_LOOPED		= (1 << 11),
 };
 
 static inline u64 *op_journal_seq(struct bch_write_op *op)
author	Kent Overstreet <kent.overstreet@gmail.com>	2018-03-29 14:22:06 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2018-04-04 11:40:56 -0400
commit	17d15dd6ae966cbbd59190054496b771d96cefd5 (patch)
tree	d16647b4605c4a4121b123ce0049827bd13aed38
parent	00a13ba01fe9d0d8d1ab9c8e932fef4172f08bed (diff)