bcache: add flag to superblock indicating if we shutdown cleanly

author: Kent Overstreet <kent.overstreet@gmail.com> 2016-09-23 16:20:48 -0800
committer: Kent Overstreet <kent.overstreet@gmail.com> 2016-10-07 12:37:11 -0800
commit: 615c4e27cf9526788c4f8055d402ebeec0af6edb (patch)
tree: 0bf1df03eab74f27e0f1989547f335609af1f7db
parent: a9d16e284cd02ad382e51cfa0bd62d167d0a0bbf (diff)
3 files changed, 74 insertions, 26 deletions
diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c
index 643143629495..e68d35026051 100644
--- a/drivers/md/bcache/fs-gc.c
+++ b/drivers/md/bcache/fs-gc.c
@@ -105,7 +105,6 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
 static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 			   struct bkey_s_c_inode inode, struct nlink link)
 {
-	struct bkey_i_inode update;
 	u16 i_mode  = le16_to_cpu(inode.v->i_mode);
 	u32 i_flags = le32_to_cpu(inode.v->i_flags);
 	u32 i_nlink = le32_to_cpu(inode.v->i_nlink);
@@ -119,9 +118,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 			 mode_to_type(i_mode));
 
 	if (!link.count) {
+		cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+				"filesystem marked clean, "
+				"but found orphaned inode %llu",
+				inode.k->p.inode);
+
 		cache_set_inconsistent_on(S_ISDIR(i_mode) &&
 			bch_empty_dir(c, inode.k->p.inode), c,
-			"non empty directory with link count 0,inode nlink %u, dir links found %u",
+			"non empty directory with link count 0, "
+			"inode nlink %u, dir links found %u",
 			i_nlink, link.dir_count);
 
 		bch_verbose(c, "deleting inum %llu", inode.k->p.inode);
@@ -133,6 +138,11 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 	}
 
 	if (i_flags & BCH_INODE_I_SIZE_DIRTY) {
+		cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+				"filesystem marked clean, "
+				"but inode %llu has i_size dirty",
+				inode.k->p.inode);
+
 		bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
 
 		/*
@@ -144,7 +154,8 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 				round_up(i_size, PAGE_SIZE) >> 9,
 				NULL, NULL);
 		if (ret) {
-			bch_err(c, "error in fs gc: error %i while truncating inode", ret);
+			bch_err(c, "error in fs gc: error %i "
+				"truncating inode", ret);
 			return ret;
 		}
 
@@ -156,23 +167,40 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 	}
 
 	if (i_flags & BCH_INODE_I_SECTORS_DIRTY) {
+		cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+				"filesystem marked clean, "
+				"but inode %llu has i_sectors dirty",
+				inode.k->p.inode);
+
 		bch_verbose(c, "recounting sectors for inode %llu", inode.k->p.inode);
 
 		i_sectors = bch_count_inode_sectors(c, inode.k->p.inode);
 		if (i_sectors < 0) {
-			bch_err(c, "error in fs gc: error %i recounting inode sectors",
+			bch_err(c, "error in fs gc: error %i "
+				"recounting inode sectors",
 				(int) i_sectors);
 			return i_sectors;
 		}
 	}
 
+	if (i_nlink != link.count + link.dir_count) {
+		cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+				"filesystem marked clean, "
+				"but inode %llu has wrong i_nlink "
+				"(type %u i_nlink %u, should be %u)",
+				inode.k->p.inode,
+				mode_to_type(i_mode), i_nlink,
+				link.count + link.dir_count);
+
+		bch_verbose(c, "setting inum %llu nlinks from %u to %u",
+			    inode.k->p.inode, i_nlink,
+			    link.count + link.dir_count);
+	}
+
 	if (i_nlink != link.count + link.dir_count ||
 	    i_flags & BCH_INODE_I_SECTORS_DIRTY ||
 	    i_flags & BCH_INODE_I_SIZE_DIRTY) {
-		if (i_nlink != link.count + link.dir_count)
-			bch_verbose(c, "setting inum %llu nlinks from %u to %u",
-				 inode.k->p.inode, i_nlink,
-				 link.count + link.dir_count);
+		struct bkey_i_inode update;
 
 		bkey_reassemble(&update.k_i, inode.s_c);
 		update.v.i_nlink = cpu_to_le32(link.count + link.dir_count);
@@ -186,7 +214,8 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 					  BTREE_INSERT_NOFAIL,
 					  BTREE_INSERT_ENTRY(iter, &update.k_i));
 		if (ret && ret != -EINTR)
-			bch_err(c, "error in fs gc: error %i while updating inode", ret);
+			bch_err(c, "error in fs gc: error %i "
+				"updating inode", ret);
 	}
 
 	return ret;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f2bbbcfe3691..f2fb9567b4b5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -699,6 +699,9 @@ static void __bch_cache_set_read_only(struct cache_set *c)
 
 	bch_btree_flush(c);
 
+	for_each_cache(ca, c, i)
+		bch_cache_allocator_stop(ca);
+
 	/*
 	 * Write a journal entry after flushing the btree, so we don't end up
 	 * replaying everything we just flushed:
@@ -706,9 +709,6 @@ static void __bch_cache_set_read_only(struct cache_set *c)
 	if (test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags))
 		bch_journal_meta(&c->journal);
 
-	for_each_cache(ca, c, i)
-		bch_cache_allocator_stop(ca);
-
 	cancel_delayed_work_sync(&c->journal.write_work);
 }
 
@@ -733,22 +733,34 @@ static void bch_cache_set_read_only_work(struct work_struct *work)
 	c->foreground_write_pd.rate.rate = UINT_MAX;
 	bch_wake_delayed_writes((unsigned long) c);
 
-	/*
-	 * If we're not doing an emergency shutdown, we want to wait on
-	 * outstanding writes to complete so they don't see spurious errors due
-	 * to shutting down the allocator.
-	 *
-	 * If we are doing an emergency shutdown, outstanding writes may hang
-	 * until we shutdown the allocator, so we don't want to wait here:
-	 */
-	wait_event(bch_read_only_wait,
-		   test_bit(CACHE_SET_EMERGENCY_RO, &c->flags) ||
-		   test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+	if (!test_bit(CACHE_SET_EMERGENCY_RO, &c->flags)) {
+		/*
+		 * If we're not doing an emergency shutdown, we want to wait on
+		 * outstanding writes to complete so they don't see spurious
+		 * errors due to shutting down the allocator:
+		 */
+		wait_event(bch_read_only_wait,
+			   test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
 
-	__bch_cache_set_read_only(c);
+		__bch_cache_set_read_only(c);
 
-	wait_event(bch_read_only_wait,
-		   test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+		if (!bch_journal_error(&c->journal)) {
+			SET_CACHE_SET_CLEAN(&c->disk_sb, true);
+			bcache_write_super(c);
+		}
+	} else {
+		/*
+		 * If we are doing an emergency shutdown outstanding writes may
+		 * hang until we shutdown the allocator so we don't want to wait
+		 * on outstanding writes before shutting everything down - but
+		 * we do need to wait on them before returning and signalling
+		 * that going RO is complete:
+		 */
+		__bch_cache_set_read_only(c);
+
+		wait_event(bch_read_only_wait,
+			   test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+	}
 
 	bch_notify_cache_set_read_only(c);
 	trace_bcache_cache_set_read_only_done(c);
@@ -1424,6 +1436,7 @@ static const char *run_cache_set(struct cache_set *c)
 		c->disk_mi[ca->sb.nr_this_dev].last_mount = cpu_to_le64(now);
 	rcu_read_unlock();
 
+	SET_CACHE_SET_CLEAN(&c->disk_sb, false);
 	bcache_write_super(c);
 
 	err = "dynamic fault";
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 6006d50418cf..999ed8f4c535 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -790,6 +790,12 @@ LE64_BITMASK(CACHE_SET_GC_RESERVE,	struct cache_sb, flags, 57, 63);
 
 LE64_BITMASK(CACHE_SET_ROOT_RESERVE,	struct cache_sb, flags2, 0,  6);
 
+/*
+ * Did we shut down cleanly? Just a hint, doesn't affect behaviour of
+ * mount/recovery path:
+ */
+LE64_BITMASK(CACHE_SET_CLEAN,		struct cache_sb, flags2, 6, 7);
+
 /* options: */
 
 /**
author	Kent Overstreet <kent.overstreet@gmail.com>	2016-09-23 16:20:48 -0800
committer	Kent Overstreet <kent.overstreet@gmail.com>	2016-10-07 12:37:11 -0800
commit	615c4e27cf9526788c4f8055d402ebeec0af6edb (patch)
tree	0bf1df03eab74f27e0f1989547f335609af1f7db
parent	a9d16e284cd02ad382e51cfa0bd62d167d0a0bbf (diff)