diff options
Diffstat (limited to 'libbcache/alloc.c')
-rw-r--r-- | libbcache/alloc.c | 319 |
1 files changed, 163 insertions, 156 deletions
diff --git a/libbcache/alloc.c b/libbcache/alloc.c index a0f8d64f..2392c688 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -39,10 +39,10 @@ * time around, and we garbage collect or rewrite the priorities sooner than we * would have otherwise. * - * bch_bucket_alloc() allocates a single bucket from a specific cache. + * bch_bucket_alloc() allocates a single bucket from a specific device. * - * bch_bucket_alloc_set() allocates one or more buckets from different caches - * out of a cache set. + * bch_bucket_alloc_set() allocates one or more buckets from different devices + * in a given filesystem. * * invalidate_buckets() drives all the processes described above. It's called * from bch_bucket_alloc() and a few other places that need to make sure free @@ -73,18 +73,19 @@ #include <linux/rcupdate.h> #include <trace/events/bcache.h> -static void __bch_bucket_free(struct cache *, struct bucket *); +static void __bch_bucket_free(struct bch_dev *, struct bucket *); +static void bch_recalc_min_prio(struct bch_dev *, int); /* Allocation groups: */ -void bch_dev_group_remove(struct cache_group *grp, struct cache *ca) +void bch_dev_group_remove(struct dev_group *grp, struct bch_dev *ca) { unsigned i; spin_lock(&grp->lock); for (i = 0; i < grp->nr; i++) - if (rcu_access_pointer(grp->d[i].dev) == ca) { + if (grp->d[i].dev == ca) { grp->nr--; memmove(&grp->d[i], &grp->d[i + 1], @@ -95,18 +96,18 @@ void bch_dev_group_remove(struct cache_group *grp, struct cache *ca) spin_unlock(&grp->lock); } -void bch_dev_group_add(struct cache_group *grp, struct cache *ca) +void bch_dev_group_add(struct dev_group *grp, struct bch_dev *ca) { unsigned i; spin_lock(&grp->lock); for (i = 0; i < grp->nr; i++) - if (rcu_access_pointer(grp->d[i].dev) == ca) + if (grp->d[i].dev == ca) goto out; BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX); - rcu_assign_pointer(grp->d[grp->nr++].dev, ca); + grp->d[grp->nr++].dev = ca; out: spin_unlock(&grp->lock); } @@ -115,10 +116,10 @@ out: static void pd_controllers_update(struct work_struct *work) { - struct cache_set *c = container_of(to_delayed_work(work), - struct cache_set, + struct bch_fs *c = container_of(to_delayed_work(work), + struct bch_fs, pd_controllers_update); - struct cache *ca; + struct bch_dev *ca; unsigned i, iter; /* All units are in bytes */ @@ -137,14 +138,15 @@ static void pd_controllers_update(struct work_struct *work) faster_tiers_dirty, -1); - group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) { + spin_lock(&c->tiers[i].devs.lock); + group_for_each_dev(ca, &c->tiers[i].devs, iter) { struct bch_dev_usage stats = bch_dev_usage_read(ca); unsigned bucket_bits = ca->bucket_bits + 9; u64 size = (ca->mi.nbuckets - ca->mi.first_bucket) << bucket_bits; u64 dirty = stats.buckets_dirty << bucket_bits; - u64 free = __buckets_free_cache(ca, stats) << bucket_bits; + u64 free = __dev_buckets_free(ca, stats) << bucket_bits; /* * Bytes of internal fragmentation, which can be * reclaimed by copy GC @@ -172,6 +174,7 @@ static void pd_controllers_update(struct work_struct *work) copygc_can_free += fragmented; } + spin_unlock(&c->tiers[i].devs.lock); } rcu_read_unlock(); @@ -229,7 +232,7 @@ static void pd_controllers_update(struct work_struct *work) * disk. */ -static int prio_io(struct cache *ca, uint64_t bucket, int op) +static int prio_io(struct bch_dev *ca, uint64_t bucket, int op) { bio_init(ca->bio_prio); bio_set_op_attrs(ca->bio_prio, op, REQ_SYNC|REQ_META); @@ -254,9 +257,9 @@ static struct nonce prio_nonce(struct prio_set *p) }}; } -static int bch_prio_write(struct cache *ca) +static int bch_prio_write(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal *j = &c->journal; struct journal_res res = { 0 }; bool need_new_journal_entry; @@ -370,9 +373,9 @@ static int bch_prio_write(struct cache *ca) return 0; } -int bch_prio_read(struct cache *ca) +int bch_prio_read(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct prio_set *p = ca->disk_buckets; struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; struct bucket_mark new; @@ -441,8 +444,15 @@ int bch_prio_read(struct cache *ca) bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen); } + + mutex_lock(&c->bucket_lock); + bch_recalc_min_prio(ca, READ); + bch_recalc_min_prio(ca, WRITE); + mutex_unlock(&c->bucket_lock); + + ret = 0; fsck_err: - return 0; + return ret; } #define BUCKET_GC_GEN_MAX 96U @@ -453,9 +463,9 @@ fsck_err: * If there aren't enough available buckets to fill up free_inc, wait until * there are. */ -static int wait_buckets_available(struct cache *ca) +static int wait_buckets_available(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; int ret = 0; while (1) { @@ -467,9 +477,9 @@ static int wait_buckets_available(struct cache *ca) if (ca->inc_gen_needs_gc >= fifo_free(&ca->free_inc)) { if (c->gc_thread) { - trace_bcache_gc_cannot_inc_gens(ca->set); + trace_bcache_gc_cannot_inc_gens(ca->fs); atomic_inc(&c->kick_gc); - wake_up_process(ca->set->gc_thread); + wake_up_process(ca->fs->gc_thread); } /* @@ -478,23 +488,23 @@ static int wait_buckets_available(struct cache *ca) * because we are actually waiting for GC to rewrite * nodes with stale pointers */ - } else if (buckets_available_cache(ca) >= + } else if (dev_buckets_available(ca) >= fifo_free(&ca->free_inc)) break; - up_read(&ca->set->gc_lock); + up_read(&ca->fs->gc_lock); schedule(); try_to_freeze(); - down_read(&ca->set->gc_lock); + down_read(&ca->fs->gc_lock); } __set_current_state(TASK_RUNNING); return ret; } -static void verify_not_on_freelist(struct cache *ca, size_t bucket) +static void verify_not_on_freelist(struct bch_dev *ca, size_t bucket) { - if (expensive_debug_checks(ca->set)) { + if (expensive_debug_checks(ca->fs)) { size_t iter; long i; unsigned j; @@ -512,14 +522,16 @@ static void verify_not_on_freelist(struct cache *ca, size_t bucket) /* Bucket heap / gen */ -void bch_recalc_min_prio(struct cache *ca, int rw) +void bch_recalc_min_prio(struct bch_dev *ca, int rw) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct prio_clock *clock = &c->prio_clock[rw]; struct bucket *g; u16 max_delta = 1; unsigned i; + lockdep_assert_held(&c->bucket_lock); + /* Determine min prio for this particular cache */ for_each_bucket(g, ca) max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw])); @@ -532,23 +544,23 @@ void bch_recalc_min_prio(struct cache *ca, int rw) */ max_delta = 1; - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) max_delta = max(max_delta, (u16) (clock->hand - ca->min_prio[rw])); clock->min_prio = clock->hand - max_delta; } -static void bch_rescale_prios(struct cache_set *c, int rw) +static void bch_rescale_prios(struct bch_fs *c, int rw) { struct prio_clock *clock = &c->prio_clock[rw]; - struct cache *ca; + struct bch_dev *ca; struct bucket *g; unsigned i; trace_bcache_rescale_prios(c); - for_each_cache(ca, c, i) { + for_each_member_device(ca, c, i) { for_each_bucket(g, ca) g->prio[rw] = clock->hand - (clock->hand - g->prio[rw]) / 2; @@ -561,8 +573,8 @@ static void bch_inc_clock_hand(struct io_timer *timer) { struct prio_clock *clock = container_of(timer, struct prio_clock, rescale); - struct cache_set *c = container_of(clock, - struct cache_set, prio_clock[clock->rw]); + struct bch_fs *c = container_of(clock, + struct bch_fs, prio_clock[clock->rw]); u64 capacity; mutex_lock(&c->bucket_lock); @@ -581,7 +593,7 @@ static void bch_inc_clock_hand(struct io_timer *timer) return; /* - * we only increment when 0.1% of the cache_set has been read + * we only increment when 0.1% of the filesystem capacity has been read * or written too, this determines if it's time * * XXX: we shouldn't really be going off of the capacity of devices in @@ -593,7 +605,7 @@ static void bch_inc_clock_hand(struct io_timer *timer) bch_io_timer_add(&c->io_clock[clock->rw], timer); } -static void bch_prio_timer_init(struct cache_set *c, int rw) +static void bch_prio_timer_init(struct bch_fs *c, int rw) { struct prio_clock *clock = &c->prio_clock[rw]; struct io_timer *timer = &clock->rescale; @@ -610,12 +622,12 @@ static void bch_prio_timer_init(struct cache_set *c, int rw) * them on the various freelists. */ -static inline bool can_inc_bucket_gen(struct cache *ca, struct bucket *g) +static inline bool can_inc_bucket_gen(struct bch_dev *ca, struct bucket *g) { return bucket_gc_gen(ca, g) < BUCKET_GC_GEN_MAX; } -static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g) +static bool bch_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g) { if (!is_available_bucket(READ_ONCE(g->mark))) return false; @@ -626,14 +638,14 @@ static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g) return can_inc_bucket_gen(ca, g); } -static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g) +static void bch_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g) { spin_lock(&ca->freelist_lock); bch_invalidate_bucket(ca, g); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -661,13 +673,13 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g) #define bucket_sort_key(g) \ ({ \ unsigned long prio = g->read_prio - ca->min_prio[READ]; \ - prio = (prio * 7) / (ca->set->prio_clock[READ].hand - \ + prio = (prio * 7) / (ca->fs->prio_clock[READ].hand - \ ca->min_prio[READ]); \ \ (((prio + 1) * bucket_sectors_used(g)) << 8) | bucket_gc_gen(ca, g);\ }) -static void invalidate_buckets_lru(struct cache *ca) +static void invalidate_buckets_lru(struct bch_dev *ca) { struct bucket_heap_entry e; struct bucket *g; @@ -677,7 +689,7 @@ static void invalidate_buckets_lru(struct cache *ca) ca->heap.used = 0; - mutex_lock(&ca->set->bucket_lock); + mutex_lock(&ca->fs->bucket_lock); bch_recalc_min_prio(ca, READ); bch_recalc_min_prio(ca, WRITE); @@ -712,11 +724,11 @@ static void invalidate_buckets_lru(struct cache *ca) bch_invalidate_one_bucket(ca, e.g); } - mutex_unlock(&ca->set->bucket_lock); + mutex_unlock(&ca->fs->bucket_lock); mutex_unlock(&ca->heap_lock); } -static void invalidate_buckets_fifo(struct cache *ca) +static void invalidate_buckets_fifo(struct bch_dev *ca) { struct bucket *g; size_t checked = 0; @@ -736,7 +748,7 @@ static void invalidate_buckets_fifo(struct cache *ca) } } -static void invalidate_buckets_random(struct cache *ca) +static void invalidate_buckets_random(struct bch_dev *ca) { struct bucket *g; size_t checked = 0; @@ -756,7 +768,7 @@ static void invalidate_buckets_random(struct cache *ca) } } -static void invalidate_buckets(struct cache *ca) +static void invalidate_buckets(struct bch_dev *ca) { ca->inc_gen_needs_gc = 0; @@ -773,7 +785,7 @@ static void invalidate_buckets(struct cache *ca) } } -static bool __bch_allocator_push(struct cache *ca, long bucket) +static bool __bch_allocator_push(struct bch_dev *ca, long bucket) { if (fifo_push(&ca->free[RESERVE_PRIO], bucket)) goto success; @@ -789,11 +801,11 @@ static bool __bch_allocator_push(struct cache *ca, long bucket) return false; success: - closure_wake_up(&ca->set->freelist_wait); + closure_wake_up(&ca->fs->freelist_wait); return true; } -static bool bch_allocator_push(struct cache *ca, long bucket) +static bool bch_allocator_push(struct bch_dev *ca, long bucket) { bool ret; @@ -806,7 +818,7 @@ static bool bch_allocator_push(struct cache *ca, long bucket) return ret; } -static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) +static void bch_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca) { u16 last_seq_ondisk = c->journal.last_seq_ondisk; struct bucket *g; @@ -821,8 +833,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) spin_lock(&ca->freelist_lock); bch_mark_alloc_bucket(ca, g, true); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = c->prio_clock[READ].hand; + g->write_prio = c->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -845,8 +857,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) */ static int bch_allocator_thread(void *arg) { - struct cache *ca = arg; - struct cache_set *c = ca->set; + struct bch_dev *ca = arg; + struct bch_fs *c = ca->fs; int ret; set_freezable(); @@ -956,7 +968,7 @@ static int bch_allocator_thread(void *arg) } out: /* - * Avoid a race with bucket_stats_update() trying to wake us up after + * Avoid a race with bch_usage_update() trying to wake us up after * we've exited: */ synchronize_rcu(); @@ -970,7 +982,7 @@ out: * * Returns index of bucket on success, 0 on failure * */ -size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve) +size_t bch_bucket_alloc(struct bch_dev *ca, enum alloc_reserve reserve) { struct bucket *g; long r; @@ -994,18 +1006,18 @@ out: g = ca->buckets + r; - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; return r; } -static void __bch_bucket_free(struct cache *ca, struct bucket *g) +static void __bch_bucket_free(struct bch_dev *ca, struct bucket *g) { bch_mark_free_bucket(ca, g); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; } enum bucket_alloc_ret { @@ -1014,17 +1026,17 @@ enum bucket_alloc_ret { FREELIST_EMPTY, /* Allocator thread not keeping up */ }; -static void recalc_alloc_group_weights(struct cache_set *c, - struct cache_group *devs) +static void recalc_alloc_group_weights(struct bch_fs *c, + struct dev_group *devs) { - struct cache *ca; + struct bch_dev *ca; u64 available_buckets = 1; /* avoid a divide by zero... */ unsigned i; for (i = 0; i < devs->nr; i++) { ca = devs->d[i].dev; - devs->d[i].weight = buckets_free_cache(ca); + devs->d[i].weight = dev_buckets_free(ca); available_buckets += devs->d[i].weight; } @@ -1042,12 +1054,12 @@ static void recalc_alloc_group_weights(struct cache_set *c, } } -static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, +static enum bucket_alloc_ret bch_bucket_alloc_group(struct bch_fs *c, struct open_bucket *ob, enum alloc_reserve reserve, unsigned nr_replicas, - struct cache_group *devs, - long *caches_used) + struct dev_group *devs, + long *devs_used) { enum bucket_alloc_ret ret; unsigned fail_idx = -1, i; @@ -1058,19 +1070,18 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, if (ob->nr_ptrs >= nr_replicas) return ALLOC_SUCCESS; - rcu_read_lock(); spin_lock(&devs->lock); for (i = 0; i < devs->nr; i++) available += !test_bit(devs->d[i].dev->dev_idx, - caches_used); + devs_used); recalc_alloc_group_weights(c, devs); i = devs->cur_device; while (ob->nr_ptrs < nr_replicas) { - struct cache *ca; + struct bch_dev *ca; u64 bucket; if (!available) { @@ -1087,7 +1098,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, ca = devs->d[i].dev; - if (test_bit(ca->dev_idx, caches_used)) + if (test_bit(ca->dev_idx, devs_used)) continue; if (fail_idx == -1 && @@ -1119,7 +1130,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, }; ob->ptr_offset[0] = 0; - __set_bit(ca->dev_idx, caches_used); + __set_bit(ca->dev_idx, devs_used); available--; devs->cur_device = i; } @@ -1128,16 +1139,15 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, err: EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC); spin_unlock(&devs->lock); - rcu_read_unlock(); return ret; } -static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c, +static enum bucket_alloc_ret __bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, enum alloc_reserve reserve, - long *caches_used) + long *devs_used) { struct bch_tier *tier; /* @@ -1148,28 +1158,28 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c, */ if (wp->group) return bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - wp->group, caches_used); + wp->group, devs_used); /* foreground writes: prefer fastest tier: */ tier = READ_ONCE(c->fastest_tier); if (tier) bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - &tier->devs, caches_used); + &tier->devs, devs_used); return bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - &c->cache_all, caches_used); + &c->all_devs, devs_used); } -static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp, +static int bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, - enum alloc_reserve reserve, long *caches_used, + enum alloc_reserve reserve, long *devs_used, struct closure *cl) { bool waiting = false; while (1) { switch (__bch_bucket_alloc_set(c, wp, ob, nr_replicas, - reserve, caches_used)) { + reserve, devs_used)) { case ALLOC_SUCCESS: if (waiting) closure_wake_up(&c->freelist_wait); @@ -1220,17 +1230,17 @@ static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp, * reference _after_ doing the index update that makes its allocation reachable. */ -static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob) +static void __bch_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) { const struct bch_extent_ptr *ptr; - struct cache *ca; lockdep_assert_held(&c->open_buckets_lock); - rcu_read_lock(); - open_bucket_for_each_online_device(c, ob, ptr, ca) + open_bucket_for_each_ptr(ob, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; + bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false); - rcu_read_unlock(); + } ob->nr_ptrs = 0; @@ -1239,7 +1249,7 @@ static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob) closure_wake_up(&c->open_buckets_wait); } -void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b) +void bch_open_bucket_put(struct bch_fs *c, struct open_bucket *b) { if (atomic_dec_and_test(&b->pin)) { spin_lock(&c->open_buckets_lock); @@ -1248,7 +1258,7 @@ void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b) } } -static struct open_bucket *bch_open_bucket_get(struct cache_set *c, +static struct open_bucket *bch_open_bucket_get(struct bch_fs *c, unsigned nr_reserved, struct closure *cl) { @@ -1283,12 +1293,13 @@ static struct open_bucket *bch_open_bucket_get(struct cache_set *c, return ret; } -static unsigned ob_ptr_sectors_free(struct open_bucket *ob, - struct cache_member_rcu *mi, +static unsigned ob_ptr_sectors_free(struct bch_fs *c, + struct open_bucket *ob, struct bch_extent_ptr *ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; unsigned i = ptr - ob->ptrs; - unsigned bucket_size = mi->m[ptr->dev].bucket_size; + unsigned bucket_size = ca->mi.bucket_size; unsigned used = (ptr->offset & (bucket_size - 1)) + ob->ptr_offset[i]; @@ -1297,31 +1308,27 @@ static unsigned ob_ptr_sectors_free(struct open_bucket *ob, return bucket_size - used; } -static unsigned open_bucket_sectors_free(struct cache_set *c, +static unsigned open_bucket_sectors_free(struct bch_fs *c, struct open_bucket *ob, unsigned nr_replicas) { - struct cache_member_rcu *mi = cache_member_info_get(c); unsigned i, sectors_free = UINT_MAX; for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++) sectors_free = min(sectors_free, - ob_ptr_sectors_free(ob, mi, &ob->ptrs[i])); - - cache_member_info_put(); + ob_ptr_sectors_free(c, ob, &ob->ptrs[i])); return sectors_free != UINT_MAX ? sectors_free : 0; } -static void open_bucket_copy_unused_ptrs(struct cache_set *c, +static void open_bucket_copy_unused_ptrs(struct bch_fs *c, struct open_bucket *new, struct open_bucket *old) { - struct cache_member_rcu *mi = cache_member_info_get(c); unsigned i; for (i = 0; i < old->nr_ptrs; i++) - if (ob_ptr_sectors_free(old, mi, &old->ptrs[i])) { + if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) { struct bch_extent_ptr tmp = old->ptrs[i]; tmp.offset += old->ptr_offset[i]; @@ -1329,25 +1336,24 @@ static void open_bucket_copy_unused_ptrs(struct cache_set *c, new->ptr_offset[new->nr_ptrs] = 0; new->nr_ptrs++; } - cache_member_info_put(); } -static void verify_not_stale(struct cache_set *c, const struct open_bucket *ob) +static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob) { #ifdef CONFIG_BCACHE_DEBUG const struct bch_extent_ptr *ptr; - struct cache *ca; - rcu_read_lock(); - open_bucket_for_each_online_device(c, ob, ptr, ca) + open_bucket_for_each_ptr(ob, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; + BUG_ON(ptr_stale(ca, ptr)); - rcu_read_unlock(); + } #endif } /* Sector allocator */ -static struct open_bucket *lock_writepoint(struct cache_set *c, +static struct open_bucket *lock_writepoint(struct bch_fs *c, struct write_point *wp) { struct open_bucket *ob; @@ -1363,7 +1369,7 @@ static struct open_bucket *lock_writepoint(struct cache_set *c, return ob; } -static int open_bucket_add_buckets(struct cache_set *c, +static int open_bucket_add_buckets(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, @@ -1371,7 +1377,7 @@ static int open_bucket_add_buckets(struct cache_set *c, enum alloc_reserve reserve, struct closure *cl) { - long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; + long devs_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; unsigned i; int ret; @@ -1386,13 +1392,13 @@ static int open_bucket_add_buckets(struct cache_set *c, if (ob->nr_ptrs >= nr_replicas) return 0; - memset(caches_used, 0, sizeof(caches_used)); + memset(devs_used, 0, sizeof(devs_used)); for (i = 0; i < ob->nr_ptrs; i++) - __set_bit(ob->ptrs[i].dev, caches_used); + __set_bit(ob->ptrs[i].dev, devs_used); ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas, - reserve, caches_used, cl); + reserve, devs_used, cl); if (ret == -EROFS && ob->nr_ptrs >= nr_replicas_required) @@ -1404,7 +1410,7 @@ static int open_bucket_add_buckets(struct cache_set *c, /* * Get us an open_bucket we can allocate from, return with it locked: */ -struct open_bucket *bch_alloc_sectors_start(struct cache_set *c, +struct open_bucket *bch_alloc_sectors_start(struct bch_fs *c, struct write_point *wp, unsigned nr_replicas, unsigned nr_replicas_required, @@ -1480,12 +1486,11 @@ retry: * Append pointers to the space we just allocated to @k, and mark @sectors space * as allocated out of @ob */ -void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, +void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e, unsigned nr_replicas, struct open_bucket *ob, unsigned sectors) { struct bch_extent_ptr tmp; - struct cache *ca; bool has_data = false; unsigned i; @@ -1500,8 +1505,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, if (nr_replicas < ob->nr_ptrs) has_data = true; - rcu_read_lock(); - for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) { EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev)); @@ -1512,33 +1515,27 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, ob->ptr_offset[i] += sectors; - if ((ca = PTR_CACHE(c, &ob->ptrs[i]))) - this_cpu_add(*ca->sectors_written, sectors); + this_cpu_add(*c->devs[tmp.dev]->sectors_written, sectors); } - - rcu_read_unlock(); } /* * Append pointers to the space we just allocated to @k, and mark @sectors space * as allocated out of @ob */ -void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp, +void bch_alloc_sectors_done(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob) { - struct cache_member_rcu *mi = cache_member_info_get(c); bool has_data = false; unsigned i; for (i = 0; i < ob->nr_ptrs; i++) { - if (!ob_ptr_sectors_free(ob, mi, &ob->ptrs[i])) + if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i])) ob->has_full_ptrs = true; else has_data = true; } - cache_member_info_put(); - if (likely(has_data)) atomic_inc(&ob->pin); else @@ -1559,12 +1556,12 @@ void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp, * - -EAGAIN: closure was added to waitlist * - -ENOSPC: out of space and no closure provided * - * @c - cache set. + * @c - filesystem. * @wp - write point to use for allocating sectors. * @k - key to return the allocated space information. * @cl - closure to wait for a bucket */ -struct open_bucket *bch_alloc_sectors(struct cache_set *c, +struct open_bucket *bch_alloc_sectors(struct bch_fs *c, struct write_point *wp, struct bkey_i_extent *e, unsigned nr_replicas, @@ -1592,16 +1589,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c, /* Startup/shutdown (ro/rw): */ -void bch_recalc_capacity(struct cache_set *c) +void bch_recalc_capacity(struct bch_fs *c) { struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier; - struct cache *ca; + struct bch_dev *ca; u64 total_capacity, capacity = 0, reserved_sectors = 0; unsigned long ra_pages = 0; unsigned i, j; - rcu_read_lock(); - for_each_cache_rcu(ca, c, i) { + for_each_online_member(ca, c, i) { struct backing_dev_info *bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); @@ -1629,10 +1625,11 @@ void bch_recalc_capacity(struct cache_set *c) goto set_capacity; /* - * Capacity of the cache set is the capacity of all the devices in the + * Capacity of the filesystem is the capacity of all the devices in the * slowest (highest) tier - we don't include lower tier devices. */ - group_for_each_cache_rcu(ca, &slowest_tier->devs, i) { + spin_lock(&slowest_tier->devs.lock); + group_for_each_dev(ca, &slowest_tier->devs, i) { size_t reserve = 0; /* @@ -1668,8 +1665,8 @@ void bch_recalc_capacity(struct cache_set *c) ca->mi.first_bucket) << ca->bucket_bits; } + spin_unlock(&slowest_tier->devs.lock); set_capacity: - rcu_read_unlock(); total_capacity = capacity; capacity *= (100 - c->opts.gc_reserve_percent); @@ -1695,10 +1692,10 @@ set_capacity: closure_wake_up(&c->freelist_wait); } -static void bch_stop_write_point(struct cache *ca, +static void bch_stop_write_point(struct bch_dev *ca, struct write_point *wp) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct open_bucket *ob; struct bch_extent_ptr *ptr; @@ -1720,9 +1717,9 @@ found: bch_open_bucket_put(c, ob); } -static bool bch_dev_has_open_write_point(struct cache *ca) +static bool bch_dev_has_open_write_point(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_extent_ptr *ptr; struct open_bucket *ob; @@ -1743,10 +1740,10 @@ static bool bch_dev_has_open_write_point(struct cache *ca) } /* device goes ro: */ -void bch_dev_allocator_stop(struct cache *ca) +void bch_dev_allocator_stop(struct bch_dev *ca) { - struct cache_set *c = ca->set; - struct cache_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_fs *c = ca->fs; + struct dev_group *tier = &c->tiers[ca->mi.tier].devs; struct task_struct *p; struct closure cl; unsigned i; @@ -1756,7 +1753,7 @@ void bch_dev_allocator_stop(struct cache *ca) /* First, remove device from allocation groups: */ bch_dev_group_remove(tier, ca); - bch_dev_group_remove(&c->cache_all, ca); + bch_dev_group_remove(&c->all_devs, ca); bch_recalc_capacity(c); @@ -1771,7 +1768,7 @@ void bch_dev_allocator_stop(struct cache *ca) /* * We need an rcu barrier between setting ca->alloc_thread = NULL and - * the thread shutting down to avoid a race with bucket_stats_update() - + * the thread shutting down to avoid a race with bch_usage_update() - * the allocator thread itself does a synchronize_rcu() on exit. * * XXX: it would be better to have the rcu barrier be asynchronous @@ -1824,10 +1821,12 @@ void bch_dev_allocator_stop(struct cache *ca) /* * Startup the allocator thread for transition to RW mode: */ -int bch_dev_allocator_start(struct cache *ca) +int bch_dev_allocator_start(struct bch_dev *ca) { - struct cache_set *c = ca->set; - struct cache_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_fs *c = ca->fs; + struct dev_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_sb_field_journal *journal_buckets; + bool has_journal; struct task_struct *k; /* @@ -1844,8 +1843,16 @@ int bch_dev_allocator_start(struct cache *ca) ca->alloc_thread = k; bch_dev_group_add(tier, ca); - bch_dev_group_add(&c->cache_all, ca); - bch_dev_group_add(&c->journal.devs, ca); + bch_dev_group_add(&c->all_devs, ca); + + mutex_lock(&c->sb_lock); + journal_buckets = bch_sb_get_journal(ca->disk_sb.sb); + has_journal = bch_nr_journal_buckets(journal_buckets) >= + BCH_JOURNAL_BUCKETS_MIN; + mutex_unlock(&c->sb_lock); + + if (has_journal) + bch_dev_group_add(&c->journal.devs, ca); bch_recalc_capacity(c); @@ -1858,7 +1865,7 @@ int bch_dev_allocator_start(struct cache *ca) return 0; } -void bch_fs_allocator_init(struct cache_set *c) +void bch_fs_allocator_init(struct bch_fs *c) { unsigned i; @@ -1878,7 +1885,7 @@ void bch_fs_allocator_init(struct cache_set *c) list_add(&c->open_buckets[i].list, &c->open_buckets_free); } - spin_lock_init(&c->cache_all.lock); + spin_lock_init(&c->all_devs.lock); for (i = 0; i < ARRAY_SIZE(c->tiers); i++) spin_lock_init(&c->tiers[i].devs.lock); |