bcache: short circuit bch_prio_write() and journal flush when possible

author: Kent Overstreet <kent.overstreet@gmail.com> 2016-12-12 12:53:56 -0900
committer: Kent Overstreet <kent.overstreet@gmail.com> 2017-01-18 21:41:26 -0900
commit: 019162ee32535849042f7482e7cd57c7263864bd (patch)
tree: 50e0b62048d0ed806328355c9e20429059e577cf
parent: 6584f775bbc01fd14455272afafcd8c45e5b9801 (diff)
12 files changed, 318 insertions, 284 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ac40a96b381e..0887ed3eccd1 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -264,7 +264,7 @@ static int bch_prio_write(struct cache *ca)
 			g = ca->buckets + r;
 			d->read_prio = cpu_to_le16(g->read_prio);
 			d->write_prio = cpu_to_le16(g->write_prio);
-			d->gen = ca->bucket_gens[r];
+			d->gen = ca->buckets[r].mark.gen;
 		}
 
 		p->next_bucket	= cpu_to_le64(ca->prio_buckets[i + 1]);
@@ -343,6 +343,7 @@ int bch_prio_read(struct cache *ca)
 	struct cache_set *c = ca->set;
 	struct prio_set *p = ca->disk_buckets;
 	struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
+	struct bucket_mark new;
 	unsigned bucket_nr = 0;
 	u64 bucket, expect, got;
 	size_t b;
@@ -398,8 +399,8 @@ int bch_prio_read(struct cache *ca)
 
 		ca->buckets[b].read_prio = le16_to_cpu(d->read_prio);
 		ca->buckets[b].write_prio = le16_to_cpu(d->write_prio);
-		ca->buckets[b].oldest_gen = d->gen;
-		ca->bucket_gens[b] = d->gen;
+
+		bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen);
 	}
 
 	return 0;
@@ -586,31 +587,18 @@ static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g)
 	return can_inc_bucket_gen(ca, g);
 }
 
-static void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *g)
+static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g)
 {
-	lockdep_assert_held(&ca->freelist_lock);
-
-	/* Ordering matters: see bch_mark_data_bucket() */
+	spin_lock(&ca->freelist_lock);
 
-	/* bucket mark updates imply a write barrier */
-	bch_mark_alloc_bucket(ca, g);
+	bch_invalidate_bucket(ca, g);
 
 	g->read_prio = ca->set->prio_clock[READ].hand;
 	g->write_prio = ca->set->prio_clock[WRITE].hand;
-	g->copygc_gen = 0;
 
 	verify_not_on_freelist(ca, g - ca->buckets);
-}
-
-static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g)
-{
-	spin_lock(&ca->freelist_lock);
-
-	/* this is what makes ptrs to the bucket invalid */
-	ca->bucket_gens[g - ca->buckets]++;
-
-	__bch_invalidate_one_bucket(ca, g);
 	BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets));
+
 	spin_unlock(&ca->freelist_lock);
 }
 
@@ -779,6 +767,35 @@ static bool bch_allocator_push(struct cache *ca, long bucket)
 	return ret;
 }
 
+static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
+{
+	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
+	struct bucket *g;
+
+	for_each_bucket(g, ca) {
+		struct bucket_mark m = READ_ONCE(g->mark);
+
+		if (is_available_bucket(m) &&
+		    !m.cached_sectors &&
+		    (!m.wait_on_journal ||
+		     ((s16) last_seq_ondisk - (s16) m.journal_seq >= 0))) {
+			spin_lock(&ca->freelist_lock);
+
+			bch_mark_alloc_bucket(ca, g, true);
+			g->read_prio = ca->set->prio_clock[READ].hand;
+			g->write_prio = ca->set->prio_clock[WRITE].hand;
+
+			verify_not_on_freelist(ca, g - ca->buckets);
+			BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets));
+
+			spin_unlock(&ca->freelist_lock);
+
+			if (fifo_full(&ca->free_inc))
+				break;
+		}
+	}
+}
+
 /**
  * bch_allocator_thread - move buckets from free_inc to reserves
  *
@@ -833,10 +850,21 @@ static int bch_allocator_thread(void *arg)
 			__set_current_state(TASK_RUNNING);
 		}
 
-		/* We've run out of free buckets! */
-
 		down_read(&c->gc_lock);
 
+		/*
+		 * See if we have buckets we can reuse without invalidating them
+		 * or forcing a journal commit:
+		 */
+		bch_find_empty_buckets(c, ca);
+
+		if (fifo_used(&ca->free_inc) * 2 > ca->free_inc.size) {
+			up_read(&c->gc_lock);
+			continue;
+		}
+
+		/* We've run out of free buckets! */
+
 		while (!fifo_full(&ca->free_inc)) {
 			if (wait_buckets_available(ca)) {
 				up_read(&c->gc_lock);
@@ -1044,7 +1072,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
 			ob->nr_ptrs * sizeof(ob->ptr_offset[0]));
 		ob->nr_ptrs++;
 		ob->ptrs[0] = (struct bch_extent_ptr) {
-			.gen	= ca->bucket_gens[bucket],
+			.gen	= ca->buckets[bucket].mark.gen,
 			.offset	= bucket_to_sector(ca, bucket),
 			.dev	= ca->sb.nr_this_dev,
 		};
@@ -1155,7 +1183,7 @@ static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob)
 
 	rcu_read_lock();
 	open_bucket_for_each_online_device(c, ob, ptr, ca)
-		bch_unmark_open_bucket(ca, PTR_BUCKET(ca, ptr));
+		bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false);
 	rcu_read_unlock();
 
 	ob->nr_ptrs = 0;
@@ -1746,8 +1774,6 @@ int bch_cache_allocator_start(struct cache *ca)
 
 	/*
 	 * allocator thread already started?
-	 * (run_cache_set() starts allocator separately from normal rw path, via
-	 * bch_cache_allocator_start_once())
 	 */
 	if (ca->alloc_thread)
 		return 0;
@@ -1773,58 +1799,6 @@ int bch_cache_allocator_start(struct cache *ca)
 	return 0;
 }
 
-/*
- * bch_cache_allocator_start - fill freelists directly with completely unused
- * buckets
- *
- * The allocator thread needs freed buckets to rewrite the prios and gens, and
- * it needs to rewrite prios and gens in order to free buckets.
- *
- * Don't increment gens. We are only re-using completely free buckets here, so
- * there are no existing pointers into them.
- *
- * Also, we can't increment gens until we re-write prios and gens, but we
- * can't do that until we can write a journal entry.
- *
- * If the journal is completely full, we cannot write a journal entry until we
- * reclaim a journal bucket, and we cannot do that until we possibly allocate
- * some buckets for btree nodes.
- *
- * So dig ourselves out of that hole here.
- *
- * This is only safe for buckets that have no live data in them, which there
- * should always be some of when this function is called, since the last time
- * we shut down there should have been unused buckets stranded on freelists.
- */
-const char *bch_cache_allocator_start_once(struct cache *ca)
-{
-	struct bucket *g;
-
-	spin_lock(&ca->freelist_lock);
-	for_each_bucket(g, ca) {
-		if (fifo_full(&ca->free[RESERVE_NONE]))
-			break;
-
-		if (bch_can_invalidate_bucket(ca, g) &&
-		    !g->mark.cached_sectors) {
-			__bch_invalidate_one_bucket(ca, g);
-			BUG_ON(!__bch_allocator_push(ca, g - ca->buckets));
-		}
-	}
-	spin_unlock(&ca->freelist_lock);
-
-	if (cache_set_init_fault("alloc_start"))
-		return "dynamic fault";
-
-	if (!fifo_full(&ca->free[RESERVE_PRIO]))
-		return "couldn't find enough available buckets to write prios";
-
-	if (bch_cache_allocator_start(ca))
-		return "error starting allocator thread";
-
-	return NULL;
-}
-
 void bch_open_buckets_init(struct cache_set *c)
 {
 	unsigned i;
diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h
index c39f43e02e80..46dbad44567a 100644
--- a/drivers/md/bcache/alloc.h
+++ b/drivers/md/bcache/alloc.h
@@ -91,7 +91,6 @@ static inline struct cache *cache_group_next(struct cache_group *devs,
 
 void bch_cache_allocator_stop(struct cache *);
 int bch_cache_allocator_start(struct cache *);
-const char *bch_cache_allocator_start_once(struct cache *);
 void bch_open_buckets_init(struct cache_set *);
 
 #endif /* _BCACHE_ALLOC_H */
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index e0c8716172e0..3540e05e40ef 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -407,7 +407,9 @@ struct cache {
 	size_t			fifo_last_bucket;
 
 	/* Allocation stuff: */
-	u8			*bucket_gens;
+
+	/* most out of date gen in the btree */
+	u8			*oldest_gens;
 	struct bucket		*buckets;
 	unsigned short		bucket_bits;	/* ilog2(bucket_size) */
 
@@ -763,6 +765,8 @@ struct cache_set {
 
 	struct journal		journal;
 
+	unsigned		bucket_journal_seq;
+
 	/* CACHING OTHER BLOCK DEVICES */
 	mempool_t		search;
 	struct radix_tree_root	devices;
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index e34445da10f5..a00785189ffe 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -102,10 +102,10 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k)
 		rcu_read_lock();
 
 		extent_for_each_online_device(c, e, ptr, ca) {
-			struct bucket *g = PTR_BUCKET(ca, ptr);
+			size_t b = PTR_BUCKET_NR(ca, ptr);
 
-			if (__gen_after(g->oldest_gen, ptr->gen))
-				g->oldest_gen = ptr->gen;
+			if (__gen_after(ca->oldest_gens[b], ptr->gen))
+				ca->oldest_gens[b] = ptr->gen;
 
 			max_stale = max(max_stale, ptr_stale(ca, ptr));
 		}
@@ -237,11 +237,11 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
 		spin_lock(&ca->freelist_lock);
 
 		fifo_for_each_entry(i, &ca->free_inc, iter)
-			bch_mark_alloc_bucket(ca, &ca->buckets[i]);
+			bch_mark_alloc_bucket(ca, &ca->buckets[i], true);
 
 		for (j = 0; j < RESERVE_NR; j++)
 			fifo_for_each_entry(i, &ca->free[j], iter)
-				bch_mark_alloc_bucket(ca, &ca->buckets[i]);
+				bch_mark_alloc_bucket(ca, &ca->buckets[i], true);
 
 		spin_unlock(&ca->freelist_lock);
 	}
@@ -254,7 +254,7 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
 		mutex_lock(&ob->lock);
 		rcu_read_lock();
 		open_bucket_for_each_online_device(c, ob, ptr, ca)
-			bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr));
+			bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), true);
 		rcu_read_unlock();
 		mutex_unlock(&ob->lock);
 	}
@@ -317,6 +317,7 @@ void bch_gc(struct cache_set *c)
 {
 	struct cache *ca;
 	struct bucket *g;
+	struct bucket_mark new;
 	u64 start_time = local_clock();
 	unsigned i;
 	int cpu;
@@ -385,8 +386,13 @@ void bch_gc(struct cache_set *c)
 	/* Clear bucket marks: */
 	for_each_cache(ca, c, i)
 		for_each_bucket(g, ca) {
-			g->oldest_gen	= ca->bucket_gens[g - ca->buckets];
-			atomic_set((atomic_t *) &g->mark.counter, 0);
+			bucket_cmpxchg(g, new, ({
+				new.owned_by_allocator	= 0;
+				new.is_metadata		= 0;
+				new.cached_sectors	= 0;
+				new.dirty_sectors	= 0;
+			}));
+			ca->oldest_gens[g - ca->buckets] = new.gen;
 		}
 
 	/* Walk allocator's references: */
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 4feaf02d8409..8b5c5b43d0f8 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -146,7 +146,7 @@ found:
 			     -c->sb.btree_node_size, true, b
 			     ? gc_pos_btree_node(b)
 			     : gc_pos_btree_root(id),
-			     &tmp);
+			     &tmp, 0);
 		/*
 		 * Don't apply tmp - pending deletes aren't tracked in
 		 * cache_set_stats:
@@ -215,7 +215,7 @@ static void bch_btree_node_free_ondisk(struct cache_set *c,
 	bch_mark_key(c, bkey_i_to_s_c(&pending->key),
 		     -c->sb.btree_node_size, true,
 		     gc_phase(GC_PHASE_PENDING_DELETE),
-		     &stats);
+		     &stats, 0);
 	/*
 	 * Don't apply stats - pending deletes aren't tracked in
 	 * cache_set_stats:
@@ -375,7 +375,7 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b,
 		bch_mark_key(c, bkey_i_to_s_c(&b->key),
 			     c->sb.btree_node_size, true,
 			     gc_pos_btree_root(b->btree_id),
-			     &stats);
+			     &stats, 0);
 
 		if (old)
 			bch_btree_node_free_index(c, NULL, old->btree_id,
@@ -636,7 +636,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter,
 	if (bkey_extent_is_data(&insert->k))
 		bch_mark_key(c, bkey_i_to_s_c(insert),
 			     c->sb.btree_node_size, true,
-			     gc_pos_btree_node(b), &stats);
+			     gc_pos_btree_node(b), &stats, 0);
 
 	while ((k = bch_btree_node_iter_peek_all(node_iter, b)) &&
 	       !btree_iter_pos_cmp_packed(b, &insert->k.p, k, false))
diff --git a/drivers/md/bcache/buckets.c b/drivers/md/bcache/buckets.c
index 254349450f13..90dfa03b0a7a 100644
--- a/drivers/md/bcache/buckets.c
+++ b/drivers/md/bcache/buckets.c
@@ -101,6 +101,27 @@ static void bch_cache_set_stats_verify(struct cache_set *c) {}
 
 #endif
 
+void bch_bucket_seq_cleanup(struct cache_set *c)
+{
+	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
+	struct cache *ca;
+	struct bucket *g;
+	struct bucket_mark m;
+	unsigned i;
+
+	for_each_cache(ca, c, i)
+		for_each_bucket(g, ca) {
+			bucket_cmpxchg(g, m, ({
+				if (!m.wait_on_journal ||
+				    ((s16) last_seq_ondisk -
+				     (s16) m.journal_seq < 0))
+					break;
+
+				m.wait_on_journal = 0;
+			}));
+		}
+}
+
 #define bucket_stats_add(_acc, _stats)					\
 do {									\
 	typeof(_acc) _a = (_acc), _s = (_stats);			\
@@ -268,16 +289,23 @@ static void bucket_stats_update(struct cache *ca,
 		bch_wake_allocator(ca);
 }
 
-static struct bucket_mark bch_bucket_mark_set(struct cache *ca,
-				struct bucket *g, struct bucket_mark new,
-				bool may_make_unavailable)
+void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
 {
 	struct bucket_stats_cache_set stats = { 0 };
-	struct bucket_mark old;
+	struct bucket_mark old, new;
 
-	old.counter = xchg(&g->mark.counter, new.counter);
+	old = bucket_cmpxchg(g, new, ({
+		new.owned_by_allocator	= 1;
+		new.is_metadata		= 0;
+		new.cached_sectors	= 0;
+		new.dirty_sectors	= 0;
+		new.copygc		= 0;
+		new.gen++;
+	}));
 
-	bucket_stats_update(ca, old, new, may_make_unavailable, &stats);
+	BUG_ON(old.dirty_sectors);
+
+	bucket_stats_update(ca, old, new, true, &stats);
 
 	/*
 	 * Ick:
@@ -293,55 +321,51 @@ static struct bucket_mark bch_bucket_mark_set(struct cache *ca,
 	stats.s[S_UNCOMPRESSED][S_CACHED] = 0;
 	BUG_ON(!bch_is_zero(&stats, sizeof(stats)));
 
-	return old;
+	if (!old.owned_by_allocator && old.cached_sectors)
+		trace_bcache_invalidate(ca, g - ca->buckets,
+					old.cached_sectors);
 }
 
-#define bucket_cmpxchg(g, old, new,				\
-		       may_make_unavailable,			\
-		       cache_set_stats, expr)			\
-do {								\
-	u32 _v = READ_ONCE((g)->mark.counter);			\
-								\
-	do {							\
-		new.counter = old.counter = _v;			\
-		expr;						\
-	} while ((_v = cmpxchg(&(g)->mark.counter,		\
-			       old.counter,			\
-			       new.counter)) != old.counter);	\
-	bucket_stats_update(ca, old, new,			\
-			    may_make_unavailable,		\
-			    cache_set_stats);			\
-} while (0)
-
 void bch_mark_free_bucket(struct cache *ca, struct bucket *g)
 {
-	bch_bucket_mark_set(ca, g,
-			    (struct bucket_mark) { .counter = 0 },
-			    false);
+	struct bucket_stats_cache_set stats = { 0 };
+	struct bucket_mark old, new;
+
+	old = bucket_cmpxchg(g, new, ({
+		new.owned_by_allocator	= 0;
+		new.is_metadata		= 0;
+		new.cached_sectors	= 0;
+		new.dirty_sectors	= 0;
+	}));
+
+	bucket_stats_update(ca, old, new, false, &stats);
 }
 
-void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g)
+void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g,
+			   bool owned_by_allocator)
 {
-	struct bucket_mark old = bch_bucket_mark_set(ca, g,
-			(struct bucket_mark) { .owned_by_allocator = 1 },
-			true);
+	struct bucket_stats_cache_set stats = { 0 };
+	struct bucket_mark old, new;
 
-	BUG_ON(old.dirty_sectors);
+	old = bucket_cmpxchg(g, new, new.owned_by_allocator = owned_by_allocator);
 
-	if (!old.owned_by_allocator && old.cached_sectors)
-		trace_bcache_invalidate(ca, g - ca->buckets,
-					old.cached_sectors);
+	bucket_stats_update(ca, old, new, true, &stats);
 }
 
 void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g,
 			      bool may_make_unavailable)
 {
-	struct bucket_mark old = bch_bucket_mark_set(ca, g,
-			(struct bucket_mark) { .is_metadata = 1 },
-			may_make_unavailable);
+	struct bucket_stats_cache_set stats = { 0 };
+	struct bucket_mark old, new;
+
+	old = bucket_cmpxchg(g, new, ({
+		new.is_metadata = 1;
+	}));
 
 	BUG_ON(old.cached_sectors);
 	BUG_ON(old.dirty_sectors);
+
+	bucket_stats_update(ca, old, new, may_make_unavailable, &stats);
 }
 
 #define saturated_add(ca, dst, src, max)			\
@@ -398,12 +422,12 @@ static void bch_mark_pointer(struct cache_set *c,
 			     s64 sectors, enum s_alloc type,
 			     bool may_make_unavailable,
 			     struct bucket_stats_cache_set *stats,
-			     bool is_gc, struct gc_pos gc_pos)
+			     bool gc_will_visit, u64 journal_seq)
 {
 	struct bucket_mark old, new;
 	unsigned saturated;
 	struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
-	u32 v = READ_ONCE(g->mark.counter);
+	u64 v = READ_ONCE(g->_mark.counter);
 	unsigned old_sectors, new_sectors;
 	int disk_sectors, compressed_sectors;
 
@@ -420,36 +444,28 @@ static void bch_mark_pointer(struct cache_set *c,
 	compressed_sectors = -__compressed_sectors(crc, old_sectors)
 		+ __compressed_sectors(crc, new_sectors);
 
+	if (gc_will_visit) {
+		if (journal_seq)
+			bucket_cmpxchg(g, new, new.journal_seq = journal_seq);
+
+		goto out;
+	}
+
 	do {
 		new.counter = old.counter = v;
 		saturated = 0;
-		/*
-		 * cmpxchg() only implies a full barrier on success, not
-		 * failure, so we need a read barrier on all iterations -
-		 * between reading the mark and checking pointer validity/gc
-		 * status
-		 */
-		smp_rmb();
 
 		/*
 		 * Check this after reading bucket mark to guard against
 		 * the allocator invalidating a bucket after we've already
 		 * checked the gen
 		 */
-		if (ptr_stale(ca, ptr)) {
+		if (gen_after(old.gen, ptr->gen)) {
 			EBUG_ON(type != S_CACHED &&
 				test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
 			return;
 		}
 
-		/*
-		 * Check this after reading bucket mark to guard against
-		 * GC starting between when we check gc_cur_key and when
-		 * the GC zeroes out marks
-		 */
-		if (!is_gc && gc_will_visit(c, gc_pos))
-			goto out;
-
 		EBUG_ON(type != S_CACHED &&
 			!may_make_unavailable &&
 			is_available_bucket(old) &&
@@ -472,11 +488,17 @@ static void bch_mark_pointer(struct cache_set *c,
 				      GC_MAX_SECTORS_USED);
 
 		if (!new.dirty_sectors &&
-		    !new.cached_sectors)
+		    !new.cached_sectors) {
 			new.is_metadata = false;
-		else
+
+			if (journal_seq) {
+				new.wait_on_journal = true;
+				new.journal_seq = journal_seq;
+			}
+		} else {
 			new.is_metadata = (type == S_META);
-	} while ((v = cmpxchg(&g->mark.counter,
+		}
+	} while ((v = cmpxchg(&g->_mark.counter,
 			      old.counter,
 			      new.counter)) != old.counter);
 
@@ -500,7 +522,7 @@ static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e,
 			    s64 sectors, bool metadata,
 			    bool may_make_unavailable,
 			    struct bucket_stats_cache_set *stats,
-			    bool is_gc, struct gc_pos gc_pos)
+			    bool gc_will_visit, u64 journal_seq)
 {
 	const struct bch_extent_ptr *ptr;
 	const union bch_extent_crc *crc;
@@ -519,7 +541,7 @@ static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e,
 		bch_mark_pointer(c, e, ca, crc, ptr, sectors,
 				 dirty ? type : S_CACHED,
 				 may_make_unavailable,
-				 stats, is_gc, gc_pos);
+				 stats, gc_will_visit, journal_seq);
 	}
 	rcu_read_unlock();
 }
@@ -528,13 +550,14 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
 			   s64 sectors, bool metadata,
 			   bool may_make_unavailable,
 			   struct bucket_stats_cache_set *stats,
-			   bool is_gc, struct gc_pos gc_pos)
+			   bool gc_will_visit, u64 journal_seq)
 {
 	switch (k.k->type) {
 	case BCH_EXTENT:
 	case BCH_EXTENT_CACHED:
 		bch_mark_extent(c, bkey_s_c_to_extent(k), sectors, metadata,
-				may_make_unavailable, stats, is_gc, gc_pos);
+				may_make_unavailable, stats,
+				gc_will_visit, journal_seq);
 		break;
 	case BCH_RESERVATION:
 		stats->persistent_reserved += sectors;
@@ -546,7 +569,7 @@ void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
 		       s64 sectors, bool metadata,
 		       struct bucket_stats_cache_set *stats)
 {
-	__bch_mark_key(c, k, sectors, metadata, true, stats, true, GC_POS_MIN);
+	__bch_mark_key(c, k, sectors, metadata, true, stats, false, 0);
 }
 
 void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
@@ -563,28 +586,43 @@ void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
 
 void bch_mark_key(struct cache_set *c, struct bkey_s_c k,
 		  s64 sectors, bool metadata, struct gc_pos gc_pos,
-		  struct bucket_stats_cache_set *stats)
+		  struct bucket_stats_cache_set *stats, u64 journal_seq)
 {
+	/*
+	 * synchronization w.r.t. GC:
+	 *
+	 * Normally, bucket sector counts/marks are updated on the fly, as
+	 * references are added/removed from the btree, the lists of buckets the
+	 * allocator owns, other metadata buckets, etc.
+	 *
+	 * When GC is in progress and going to mark this reference, we do _not_
+	 * mark this reference here, to avoid double counting - GC will count it
+	 * when it gets to it.
+	 *
+	 * To know whether we should mark a given reference (GC either isn't
+	 * running, or has already marked references at this position) we
+	 * construct a total order for everything GC walks. Then, we can simply
+	 * compare the position of the reference we're marking - @gc_pos - with
+	 * GC's current position. If GC is going to mark this reference, GC's
+	 * current position will be less than @gc_pos; if GC's current position
+	 * is greater than @gc_pos GC has either already walked this position,
+	 * or isn't running.
+	 *
+	 * To avoid racing with GC's position changing, we have to deal with
+	 *  - GC's position being set to GC_POS_MIN when GC starts:
+	 *    bucket_stats_lock guards against this
+	 *  - GC's position overtaking @gc_pos: we guard against this with
+	 *    whatever lock protects the data structure the reference lives in
+	 *    (e.g. the btree node lock, or the relevant allocator lock).
+	 */
 	lg_local_lock(&c->bucket_stats_lock);
-	__bch_mark_key(c, k, sectors, metadata, false, stats, false, gc_pos);
+	__bch_mark_key(c, k, sectors, metadata, false, stats,
+		       gc_will_visit(c, gc_pos), journal_seq);
 
 	bch_cache_set_stats_verify(c);
 	lg_local_unlock(&c->bucket_stats_lock);
 }
 
-void bch_unmark_open_bucket(struct cache *ca, struct bucket *g)
-{
-	struct bucket_stats_cache_set stats = { 0 };
-	struct bucket_mark old, new;
-
-	bucket_cmpxchg(g, old, new, false, NULL, ({
-		new.owned_by_allocator = 0;
-	}));
-
-	/* owned_by_allocator buckets aren't tracked in cache_set_stats: */
-	BUG_ON(!bch_is_zero(&stats, sizeof(stats)));
-}
-
 static u64 __recalc_sectors_available(struct cache_set *c)
 {
 	return c->capacity - cache_set_sectors_used(c);
diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h
index 277085e152eb..35100eba351e 100644
--- a/drivers/md/bcache/buckets.h
+++ b/drivers/md/bcache/buckets.h
@@ -10,10 +10,24 @@
 #include "buckets_types.h"
 #include "super.h"
 
-#define for_each_bucket(b, ca)						\
-	for (b = (ca)->buckets + (ca)->mi.first_bucket;			\
+#define for_each_bucket(b, ca)					\
+	for (b = (ca)->buckets + (ca)->mi.first_bucket;		\
 	     b < (ca)->buckets + (ca)->mi.nbuckets; b++)
 
+#define bucket_cmpxchg(g, new, expr)				\
+({								\
+	u64 _v = READ_ONCE((g)->_mark.counter);			\
+	struct bucket_mark _old;				\
+								\
+	do {							\
+		(new).counter = _old.counter = _v;		\
+		expr;						\
+	} while ((_v = cmpxchg(&(g)->_mark.counter,		\
+			       _old.counter,			\
+			       (new).counter)) != _old.counter);\
+	_old;							\
+})
+
 /*
  * bucket_gc_gen() returns the difference between the bucket's current gen and
  * the oldest gen of any pointer into that bucket in the btree.
@@ -22,7 +36,7 @@
 static inline u8 bucket_gc_gen(struct cache *ca, struct bucket *g)
 {
 	unsigned long r = g - ca->buckets;
-	return ca->bucket_gens[r] - ca->buckets[r].oldest_gen;
+	return g->mark.gen - ca->oldest_gens[r];
 }
 
 static inline struct cache *PTR_CACHE(const struct cache_set *c,
@@ -63,13 +77,7 @@ static inline size_t PTR_BUCKET_NR_TRACE(const struct cache_set *c,
 	return bucket;
 }
 
-static inline u8 PTR_BUCKET_GEN(const struct cache *ca,
-				const struct bch_extent_ptr *ptr)
-{
-	return ca->bucket_gens[PTR_BUCKET_NR(ca, ptr)];
-}
-
-static inline struct bucket *PTR_BUCKET(struct cache *ca,
+static inline struct bucket *PTR_BUCKET(const struct cache *ca,
 					const struct bch_extent_ptr *ptr)
 {
 	return ca->buckets + PTR_BUCKET_NR(ca, ptr);
@@ -100,7 +108,7 @@ static inline u8 gen_after(u8 a, u8 b)
 static inline u8 ptr_stale(const struct cache *ca,
 			   const struct bch_extent_ptr *ptr)
 {
-	return gen_after(PTR_BUCKET_GEN(ca, ptr), ptr->gen);
+	return gen_after(PTR_BUCKET(ca, ptr)->mark.gen, ptr->gen);
 }
 
 /* bucket heaps */
@@ -231,16 +239,18 @@ static inline bool is_available_bucket(struct bucket_mark mark)
 		!mark.dirty_sectors);
 }
 
+void bch_bucket_seq_cleanup(struct cache_set *);
+
+void bch_invalidate_bucket(struct cache *, struct bucket *);
 void bch_mark_free_bucket(struct cache *, struct bucket *);
-void bch_mark_alloc_bucket(struct cache *, struct bucket *);
+void bch_mark_alloc_bucket(struct cache *, struct bucket *, bool);
 void bch_mark_metadata_bucket(struct cache *, struct bucket *, bool);
-void bch_unmark_open_bucket(struct cache *, struct bucket *);
 
 void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
 		       struct bucket_stats_cache_set *);
 void bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool);
 void bch_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
-		  struct gc_pos, struct bucket_stats_cache_set *);
+		  struct gc_pos, struct bucket_stats_cache_set *, u64);
 
 void bch_recalc_sectors_available(struct cache_set *);
 
diff --git a/drivers/md/bcache/buckets_types.h b/drivers/md/bcache/buckets_types.h
index 7712ff2b6d1a..90bb09c6db35 100644
--- a/drivers/md/bcache/buckets_types.h
+++ b/drivers/md/bcache/buckets_types.h
@@ -4,14 +4,27 @@
 struct bucket_mark {
 	union {
 	struct {
-		u32		counter;
+		u64		counter;
 	};
 
 	struct {
+		u8		gen;
+
+		/* generation copygc is going to move this bucket into */
+		unsigned	copygc:1;
+		unsigned	wait_on_journal:1;
+
 		unsigned	owned_by_allocator:1;
-		unsigned	cached_sectors:15;
 		unsigned	is_metadata:1;
-		unsigned	dirty_sectors:15;
+
+		u16		cached_sectors;
+		u16		dirty_sectors;
+
+		/*
+		 * low bits of journal sequence number when this bucket was most
+		 * recently modified:
+		 */
+		u16		journal_seq;
 	};
 	};
 };
@@ -24,12 +37,11 @@ struct bucket {
 		};
 		u16		prio[2];
 	};
-	struct bucket_mark	mark;
-	/* Most out of date gen in the btree */
-	u8			oldest_gen;
 
-	/* generation copygc is going to move this bucket into */
-	u8			copygc_gen;
+	union {
+		struct bucket_mark	_mark;
+		const struct bucket_mark mark;
+	};
 };
 
 struct bucket_stats_cache {
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 288603bb5e3c..a0d8fa425930 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -511,8 +511,9 @@ err:
 	cache_set_bug(c, "%s btree pointer %s: bucket %zi prio %i "
 		      "gen %i last_gc %i mark %08x",
 		      err, buf, PTR_BUCKET_NR(ca, ptr),
-		      g->read_prio, PTR_BUCKET_GEN(ca, ptr),
-		      g->oldest_gen, g->mark.counter);
+		      g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
+		      ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
+		      (unsigned) g->mark.counter);
 	rcu_read_unlock();
 }
 
@@ -862,61 +863,65 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct cache_set *c,
 	return nr;
 }
 
-static void bch_add_sectors(struct btree_iter *iter, struct bkey_s_c k,
-			    u64 offset, s64 sectors,
-			    struct bucket_stats_cache_set *stats)
+struct extent_insert_state {
+	struct btree_insert		*trans;
+	struct btree_insert_entry	*insert;
+	struct bpos			committed;
+	struct bucket_stats_cache_set	stats;
+
+	/* for deleting: */
+	struct bkey_i			whiteout;
+	bool				do_journal;
+	bool				deleting;
+};
+
+static void bch_add_sectors(struct extent_insert_state *s,
+			    struct bkey_s_c k, u64 offset, s64 sectors)
 {
-	struct cache_set *c = iter->c;
-	struct btree *b = iter->nodes[0];
+	struct cache_set *c = s->trans->c;
+	struct btree *b = s->insert->iter->nodes[0];
 
-	EBUG_ON(iter->level);
 	EBUG_ON(bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0);
 
 	if (!sectors)
 		return;
 
-	bch_mark_key(c, k, sectors, false, gc_pos_btree_node(b), stats);
+	bch_mark_key(c, k, sectors, false, gc_pos_btree_node(b),
+		     &s->stats, s->trans->journal_res.seq);
 
 	if (bkey_extent_is_data(k.k) &&
 	    !bkey_extent_is_cached(k.k))
 		bcache_dev_sectors_dirty_add(c, k.k->p.inode, offset, sectors);
 }
 
-static void bch_subtract_sectors(struct btree_iter *iter, struct bkey_s_c k,
-				 u64 offset, s64 sectors,
-				 struct bucket_stats_cache_set *stats)
+static void bch_subtract_sectors(struct extent_insert_state *s,
+				 struct bkey_s_c k, u64 offset, s64 sectors)
 {
-	bch_add_sectors(iter, k, offset, -sectors, stats);
+	bch_add_sectors(s, k, offset, -sectors);
 }
 
 /* These wrappers subtract exactly the sectors that we're removing from @k */
-static void bch_cut_subtract_back(struct btree_iter *iter,
-				  struct bpos where, struct bkey_s k,
-				  struct bucket_stats_cache_set *stats)
+static void bch_cut_subtract_back(struct extent_insert_state *s,
+				  struct bpos where, struct bkey_s k)
 {
-	bch_subtract_sectors(iter, k.s_c, where.offset,
-			     k.k->p.offset - where.offset,
-			     stats);
+	bch_subtract_sectors(s, k.s_c, where.offset,
+			     k.k->p.offset - where.offset);
 	bch_cut_back(where, k.k);
 }
 
-static void bch_cut_subtract_front(struct btree_iter *iter,
-				   struct bpos where, struct bkey_s k,
-				   struct bucket_stats_cache_set *stats)
+static void bch_cut_subtract_front(struct extent_insert_state *s,
+				   struct bpos where, struct bkey_s k)
 {
-	bch_subtract_sectors(iter, k.s_c, bkey_start_offset(k.k),
-			     where.offset - bkey_start_offset(k.k),
-			     stats);
+	bch_subtract_sectors(s, k.s_c, bkey_start_offset(k.k),
+			     where.offset - bkey_start_offset(k.k));
 	__bch_cut_front(where, k);
 }
 
-static void bch_drop_subtract(struct btree_iter *iter, struct bkey_s k,
-			      struct bucket_stats_cache_set *stats)
+static void bch_drop_subtract(struct extent_insert_state *s, struct bkey_s k)
 {
 	if (k.k->size)
-		bch_subtract_sectors(iter, k.s_c,
-				     bkey_start_offset(k.k), k.k->size,
-				     stats);
+		bch_subtract_sectors(s, k.s_c,
+				     bkey_start_offset(k.k), k.k->size);
 	k.k->size = 0;
 	__set_bkey_deleted(k.k);
 }
@@ -1041,18 +1046,6 @@ static bool bch_extent_merge_inline(struct cache_set *,
 
 #define MAX_LOCK_HOLD_TIME	(5 * NSEC_PER_MSEC)
 
-struct extent_insert_state {
-	struct btree_insert		*trans;
-	struct btree_insert_entry	*insert;
-	struct bpos			committed;
-	struct bucket_stats_cache_set	stats;
-
-	/* for deleting: */
-	struct bkey_i			whiteout;
-	bool				do_journal;
-	bool				deleting;
-};
-
 static enum btree_insert_ret
 extent_insert_should_stop(struct extent_insert_state *s)
 {
@@ -1146,12 +1139,12 @@ static void extent_insert_committed(struct extent_insert_state *s)
 	    bkey_cmp(s->committed, insert->k.p) &&
 	    bkey_extent_is_compressed(c, bkey_i_to_s_c(insert))) {
 		/* XXX: possibly need to increase our reservation? */
-		bch_cut_subtract_back(iter, s->committed,
-				      bkey_i_to_s(&split.k), &s->stats);
+		bch_cut_subtract_back(s, s->committed,
+				      bkey_i_to_s(&split.k));
 		bch_cut_front(s->committed, insert);
-		bch_add_sectors(iter, bkey_i_to_s_c(insert),
+		bch_add_sectors(s, bkey_i_to_s_c(insert),
 				bkey_start_offset(&insert->k),
-				insert->k.size, &s->stats);
+				insert->k.size);
 	} else {
 		bch_cut_back(s->committed, &split.k.k);
 		bch_cut_front(s->committed, insert);
@@ -1197,8 +1190,7 @@ __extent_insert_advance_pos(struct extent_insert_state *s,
 		break;
 	case BTREE_HOOK_NO_INSERT:
 		extent_insert_committed(s);
-		bch_cut_subtract_front(s->insert->iter, next_pos,
-				       bkey_i_to_s(s->insert->k), &s->stats);
+		bch_cut_subtract_front(s, next_pos, bkey_i_to_s(s->insert->k));
 
 		bch_btree_iter_set_pos_same_leaf(s->insert->iter, next_pos);
 		break;
@@ -1296,16 +1288,14 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
 	switch (overlap) {
 	case BCH_EXTENT_OVERLAP_FRONT:
 		/* insert overlaps with start of k: */
-		bch_cut_subtract_front(iter, insert->k.p, k, &s->stats);
+		bch_cut_subtract_front(s, insert->k.p, k);
 		BUG_ON(bkey_deleted(k.k));
 		extent_save(b, node_iter, _k, k.k);
 		break;
 
 	case BCH_EXTENT_OVERLAP_BACK:
 		/* insert overlaps with end of k: */
-		bch_cut_subtract_back(iter,
-				      bkey_start_pos(&insert->k),
-				      k, &s->stats);
+		bch_cut_subtract_back(s, bkey_start_pos(&insert->k), k);
 		BUG_ON(bkey_deleted(k.k));
 		extent_save(b, node_iter, _k, k.k);
 
@@ -1327,7 +1317,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
 			btree_keys_account_key_drop(&b->nr,
 						t - b->set, _k);
 
-		bch_drop_subtract(iter, k, &s->stats);
+		bch_drop_subtract(s, k);
 		k.k->p = bkey_start_pos(&insert->k);
 		if (!__extent_save(b, node_iter, _k, k.k)) {
 			/*
@@ -1381,13 +1371,13 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
 		bch_cut_back(bkey_start_pos(&insert->k), &split.k.k);
 		BUG_ON(bkey_deleted(&split.k.k));
 
-		bch_cut_subtract_front(iter, insert->k.p, k, &s->stats);
+		bch_cut_subtract_front(s, insert->k.p, k);
 		BUG_ON(bkey_deleted(k.k));
 		extent_save(b, node_iter, _k, k.k);
 
-		bch_add_sectors(iter, bkey_i_to_s_c(&split.k),
+		bch_add_sectors(s, bkey_i_to_s_c(&split.k),
 				bkey_start_offset(&split.k.k),
-				split.k.k.size, &s->stats);
+				split.k.k.size);
 		extent_bset_insert(c, iter, &split.k);
 		break;
 	}
@@ -1452,9 +1442,8 @@ bch_delete_fixup_extent(struct extent_insert_state *s)
 		if (overlap == BCH_EXTENT_OVERLAP_ALL) {
 			btree_keys_account_key_drop(&b->nr,
 						t - b->set, _k);
-			bch_subtract_sectors(iter, k.s_c,
-					     bkey_start_offset(k.k), k.k->size,
-					     &s->stats);
+			bch_subtract_sectors(s, k.s_c,
+					     bkey_start_offset(k.k), k.k->size);
 			_k->type = KEY_TYPE_DISCARD;
 			reserve_whiteout(b, t, _k);
 		} else if (k.k->needs_whiteout ||
@@ -1583,9 +1572,9 @@ bch_insert_fixup_extent(struct btree_insert *trans,
 	EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
 
 	if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
-		bch_add_sectors(iter, bkey_i_to_s_c(insert->k),
+		bch_add_sectors(&s, bkey_i_to_s_c(insert->k),
 				bkey_start_offset(&insert->k->k),
-				insert->k->k.size, &s.stats);
+				insert->k->k.size);
 
 	while (bkey_cmp(s.committed, insert->k->k.p) < 0 &&
 	       (ret = extent_insert_should_stop(&s)) == BTREE_INSERT_OK &&
@@ -1652,9 +1641,9 @@ stop:
 	 */
 	if (insert->k->k.size &&
 	    !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
-		bch_subtract_sectors(iter, bkey_i_to_s_c(insert->k),
+		bch_subtract_sectors(&s, bkey_i_to_s_c(insert->k),
 				     bkey_start_offset(&insert->k->k),
-				     insert->k->k.size, &s.stats);
+				     insert->k->k.size);
 
 	bch_cache_set_stats_apply(c, &s.stats, trans->disk_res,
 				  gc_pos_btree_node(b));
@@ -1861,8 +1850,9 @@ bad_ptr:
 	cache_set_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i "
 		      "gen %i last_gc %i mark 0x%08x",
 		      buf, PTR_BUCKET_NR(ca, ptr),
-		      g->read_prio, PTR_BUCKET_GEN(ca, ptr),
-		      g->oldest_gen, g->mark.counter);
+		      g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
+		      ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
+		      (unsigned) g->mark.counter);
 	cache_member_info_put();
 	return;
 }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 28a79a1a0e3f..3a30b102c093 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -1077,6 +1077,11 @@ static enum {
 	cancel_delayed_work(&j->write_work);
 	spin_unlock(&j->lock);
 
+	if (c->bucket_journal_seq > 1 << 14) {
+		c->bucket_journal_seq = 0;
+		bch_bucket_seq_cleanup(c);
+	}
+
 	/* ugh - might be called from __journal_res_get() under wait_event() */
 	__set_current_state(TASK_RUNNING);
 	bch_journal_buf_put(j, old.idx, need_write_just_set);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 3da05da54c98..917fd6ff9e00 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -21,24 +21,21 @@
 
 /* Moving GC - IO loop */
 
-static bool moving_pred(struct cache *ca, struct bkey_s_c k)
+static const struct bch_extent_ptr *moving_pred(struct cache *ca,
+						struct bkey_s_c k)
 {
-	struct cache_set *c = ca->set;
 	const struct bch_extent_ptr *ptr;
-	bool ret = false;
 
 	if (bkey_extent_is_data(k.k)) {
 		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
 
-		rcu_read_lock();
 		extent_for_each_ptr(e, ptr)
-			if (PTR_CACHE(c, ptr) == ca &&
-			    PTR_BUCKET(ca, ptr)->copygc_gen)
-				ret = true;
-		rcu_read_unlock();
+			if ((ca->sb.nr_this_dev == ptr->dev) &&
+			    PTR_BUCKET(ca, ptr)->mark.copygc)
+				return ptr;
 	}
 
-	return ret;
+	return NULL;
 }
 
 static int issue_moving_gc_move(struct cache *ca,
@@ -49,14 +46,10 @@ static int issue_moving_gc_move(struct cache *ca,
 	const struct bch_extent_ptr *ptr;
 	int ret;
 
-	extent_for_each_ptr(bkey_s_c_to_extent(k), ptr)
-		if ((ca->sb.nr_this_dev == ptr->dev) &&
-		    PTR_BUCKET(ca, ptr)->copygc_gen)
-			goto found;
+	ptr = moving_pred(ca, k);
+	if (!ptr) /* We raced - bucket's been reused */
+		return 0;
 
-	/* We raced - bucket's been reused */
-	return 0;
-found:
 	ret = bch_data_move(c, ctxt, &ca->copygc_write_point, k, ptr);
 	if (!ret)
 		trace_bcache_gc_copy(k.k);
@@ -116,7 +109,7 @@ next:
 
 	/* don't check this if we bailed out early: */
 	for_each_bucket(g, ca)
-		if (g->copygc_gen && bucket_sectors_used(g)) {
+		if (g->mark.copygc && bucket_sectors_used(g)) {
 			sectors_not_moved += bucket_sectors_used(g);
 			buckets_not_moved++;
 		}
@@ -149,6 +142,7 @@ static void bch_moving_gc(struct cache *ca)
 {
 	struct cache_set *c = ca->set;
 	struct bucket *g;
+	struct bucket_mark new;
 	u64 sectors_to_move;
 	size_t buckets_to_move, buckets_unused = 0;
 	struct bucket_heap_entry e;
@@ -182,7 +176,7 @@ static void bch_moving_gc(struct cache *ca)
 	/*
 	 * We need bucket marks to be up to date, so gc can't be recalculating
 	 * them, and we don't want the allocator invalidating a bucket after
-	 * we've decided to evacuate it but before we set copygc_gen:
+	 * we've decided to evacuate it but before we set copygc:
 	 */
 	down_read(&c->gc_lock);
 	mutex_lock(&ca->heap_lock);
@@ -190,7 +184,7 @@ static void bch_moving_gc(struct cache *ca)
 
 	ca->heap.used = 0;
 	for_each_bucket(g, ca) {
-		g->copygc_gen = 0;
+		bucket_cmpxchg(g, new, new.copygc = 0);
 
 		if (bucket_unused(g)) {
 			buckets_unused++;
@@ -219,7 +213,7 @@ static void bch_moving_gc(struct cache *ca)
 	}
 
 	for (i = 0; i < ca->heap.used; i++)
-		ca->heap.data[i].g->copygc_gen = 1;
+		bucket_cmpxchg(ca->heap.data[i].g, new, new.copygc = 1);
 
 	buckets_to_move = ca->heap.used;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1aba0a698755..93a97114a10e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1341,9 +1341,10 @@ static const char *run_cache_set(struct cache_set *c)
 		 */
 		bch_journal_start(c);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
 			if (ca->mi.state == CACHE_ACTIVE &&
-			    (err = bch_cache_allocator_start_once(ca))) {
+			    bch_cache_allocator_start(ca)) {
 				percpu_ref_put(&ca->ref);
 				goto err;
 			}
@@ -1406,9 +1407,10 @@ static const char *run_cache_set(struct cache_set *c)
 		bch_journal_start(c);
 		bch_journal_set_replay_done(&c->journal);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
 			if (ca->mi.state == CACHE_ACTIVE &&
-			    (err = bch_cache_allocator_start_once(ca))) {
+			    bch_cache_allocator_start(ca)) {
 				percpu_ref_put(&ca->ref);
 				goto err;
 			}
@@ -1709,7 +1711,7 @@ static void bch_cache_free_work(struct work_struct *work)
 	kfree(ca->bio_prio);
 	kfree(ca->journal.bio);
 	vfree(ca->buckets);
-	vfree(ca->bucket_gens);
+	vfree(ca->oldest_gens);
 	free_heap(&ca->heap);
 	free_fifo(&ca->free_inc);
 
@@ -1979,7 +1981,7 @@ static const char *cache_alloc(struct bcache_superblock *sb,
 	    !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
 	    !init_fifo(&ca->free_inc,	free_inc_reserve, GFP_KERNEL) ||
 	    !init_heap(&ca->heap,	heap_size, GFP_KERNEL) ||
-	    !(ca->bucket_gens	= vzalloc(sizeof(u8) *
+	    !(ca->oldest_gens	= vzalloc(sizeof(u8) *
 					  ca->mi.nbuckets)) ||
 	    !(ca->buckets	= vzalloc(sizeof(struct bucket) *
 					  ca->mi.nbuckets)) ||
@@ -2211,8 +2213,8 @@ have_slot:
 	bch_notify_cache_added(ca);
 
 	if (ca->mi.state == CACHE_ACTIVE) {
-		err = bch_cache_allocator_start_once(ca);
-		if (err)
+		err = "error starting allocator thread";
+		if (bch_cache_allocator_start(ca))
 			goto err_put;
 
 		err = __bch_cache_read_write(ca);
author	Kent Overstreet <kent.overstreet@gmail.com>	2016-12-12 12:53:56 -0900
committer	Kent Overstreet <kent.overstreet@gmail.com>	2017-01-18 21:41:26 -0900
commit	019162ee32535849042f7482e7cd57c7263864bd (patch)
tree	50e0b62048d0ed806328355c9e20429059e577cf
parent	6584f775bbc01fd14455272afafcd8c45e5b9801 (diff)