summaryrefslogtreecommitdiff
path: root/libbcache/alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcache/alloc.c')
-rw-r--r--libbcache/alloc.c319
1 files changed, 163 insertions, 156 deletions
diff --git a/libbcache/alloc.c b/libbcache/alloc.c
index a0f8d64f..2392c688 100644
--- a/libbcache/alloc.c
+++ b/libbcache/alloc.c
@@ -39,10 +39,10 @@
* time around, and we garbage collect or rewrite the priorities sooner than we
* would have otherwise.
*
- * bch_bucket_alloc() allocates a single bucket from a specific cache.
+ * bch_bucket_alloc() allocates a single bucket from a specific device.
*
- * bch_bucket_alloc_set() allocates one or more buckets from different caches
- * out of a cache set.
+ * bch_bucket_alloc_set() allocates one or more buckets from different devices
+ * in a given filesystem.
*
* invalidate_buckets() drives all the processes described above. It's called
* from bch_bucket_alloc() and a few other places that need to make sure free
@@ -73,18 +73,19 @@
#include <linux/rcupdate.h>
#include <trace/events/bcache.h>
-static void __bch_bucket_free(struct cache *, struct bucket *);
+static void __bch_bucket_free(struct bch_dev *, struct bucket *);
+static void bch_recalc_min_prio(struct bch_dev *, int);
/* Allocation groups: */
-void bch_dev_group_remove(struct cache_group *grp, struct cache *ca)
+void bch_dev_group_remove(struct dev_group *grp, struct bch_dev *ca)
{
unsigned i;
spin_lock(&grp->lock);
for (i = 0; i < grp->nr; i++)
- if (rcu_access_pointer(grp->d[i].dev) == ca) {
+ if (grp->d[i].dev == ca) {
grp->nr--;
memmove(&grp->d[i],
&grp->d[i + 1],
@@ -95,18 +96,18 @@ void bch_dev_group_remove(struct cache_group *grp, struct cache *ca)
spin_unlock(&grp->lock);
}
-void bch_dev_group_add(struct cache_group *grp, struct cache *ca)
+void bch_dev_group_add(struct dev_group *grp, struct bch_dev *ca)
{
unsigned i;
spin_lock(&grp->lock);
for (i = 0; i < grp->nr; i++)
- if (rcu_access_pointer(grp->d[i].dev) == ca)
+ if (grp->d[i].dev == ca)
goto out;
BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX);
- rcu_assign_pointer(grp->d[grp->nr++].dev, ca);
+ grp->d[grp->nr++].dev = ca;
out:
spin_unlock(&grp->lock);
}
@@ -115,10 +116,10 @@ out:
static void pd_controllers_update(struct work_struct *work)
{
- struct cache_set *c = container_of(to_delayed_work(work),
- struct cache_set,
+ struct bch_fs *c = container_of(to_delayed_work(work),
+ struct bch_fs,
pd_controllers_update);
- struct cache *ca;
+ struct bch_dev *ca;
unsigned i, iter;
/* All units are in bytes */
@@ -137,14 +138,15 @@ static void pd_controllers_update(struct work_struct *work)
faster_tiers_dirty,
-1);
- group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) {
+ spin_lock(&c->tiers[i].devs.lock);
+ group_for_each_dev(ca, &c->tiers[i].devs, iter) {
struct bch_dev_usage stats = bch_dev_usage_read(ca);
unsigned bucket_bits = ca->bucket_bits + 9;
u64 size = (ca->mi.nbuckets -
ca->mi.first_bucket) << bucket_bits;
u64 dirty = stats.buckets_dirty << bucket_bits;
- u64 free = __buckets_free_cache(ca, stats) << bucket_bits;
+ u64 free = __dev_buckets_free(ca, stats) << bucket_bits;
/*
* Bytes of internal fragmentation, which can be
* reclaimed by copy GC
@@ -172,6 +174,7 @@ static void pd_controllers_update(struct work_struct *work)
copygc_can_free += fragmented;
}
+ spin_unlock(&c->tiers[i].devs.lock);
}
rcu_read_unlock();
@@ -229,7 +232,7 @@ static void pd_controllers_update(struct work_struct *work)
* disk.
*/
-static int prio_io(struct cache *ca, uint64_t bucket, int op)
+static int prio_io(struct bch_dev *ca, uint64_t bucket, int op)
{
bio_init(ca->bio_prio);
bio_set_op_attrs(ca->bio_prio, op, REQ_SYNC|REQ_META);
@@ -254,9 +257,9 @@ static struct nonce prio_nonce(struct prio_set *p)
}};
}
-static int bch_prio_write(struct cache *ca)
+static int bch_prio_write(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
struct journal *j = &c->journal;
struct journal_res res = { 0 };
bool need_new_journal_entry;
@@ -370,9 +373,9 @@ static int bch_prio_write(struct cache *ca)
return 0;
}
-int bch_prio_read(struct cache *ca)
+int bch_prio_read(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket_mark new;
@@ -441,8 +444,15 @@ int bch_prio_read(struct cache *ca)
bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen);
}
+
+ mutex_lock(&c->bucket_lock);
+ bch_recalc_min_prio(ca, READ);
+ bch_recalc_min_prio(ca, WRITE);
+ mutex_unlock(&c->bucket_lock);
+
+ ret = 0;
fsck_err:
- return 0;
+ return ret;
}
#define BUCKET_GC_GEN_MAX 96U
@@ -453,9 +463,9 @@ fsck_err:
* If there aren't enough available buckets to fill up free_inc, wait until
* there are.
*/
-static int wait_buckets_available(struct cache *ca)
+static int wait_buckets_available(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
int ret = 0;
while (1) {
@@ -467,9 +477,9 @@ static int wait_buckets_available(struct cache *ca)
if (ca->inc_gen_needs_gc >= fifo_free(&ca->free_inc)) {
if (c->gc_thread) {
- trace_bcache_gc_cannot_inc_gens(ca->set);
+ trace_bcache_gc_cannot_inc_gens(ca->fs);
atomic_inc(&c->kick_gc);
- wake_up_process(ca->set->gc_thread);
+ wake_up_process(ca->fs->gc_thread);
}
/*
@@ -478,23 +488,23 @@ static int wait_buckets_available(struct cache *ca)
* because we are actually waiting for GC to rewrite
* nodes with stale pointers
*/
- } else if (buckets_available_cache(ca) >=
+ } else if (dev_buckets_available(ca) >=
fifo_free(&ca->free_inc))
break;
- up_read(&ca->set->gc_lock);
+ up_read(&ca->fs->gc_lock);
schedule();
try_to_freeze();
- down_read(&ca->set->gc_lock);
+ down_read(&ca->fs->gc_lock);
}
__set_current_state(TASK_RUNNING);
return ret;
}
-static void verify_not_on_freelist(struct cache *ca, size_t bucket)
+static void verify_not_on_freelist(struct bch_dev *ca, size_t bucket)
{
- if (expensive_debug_checks(ca->set)) {
+ if (expensive_debug_checks(ca->fs)) {
size_t iter;
long i;
unsigned j;
@@ -512,14 +522,16 @@ static void verify_not_on_freelist(struct cache *ca, size_t bucket)
/* Bucket heap / gen */
-void bch_recalc_min_prio(struct cache *ca, int rw)
+void bch_recalc_min_prio(struct bch_dev *ca, int rw)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
struct prio_clock *clock = &c->prio_clock[rw];
struct bucket *g;
u16 max_delta = 1;
unsigned i;
+ lockdep_assert_held(&c->bucket_lock);
+
/* Determine min prio for this particular cache */
for_each_bucket(g, ca)
max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw]));
@@ -532,23 +544,23 @@ void bch_recalc_min_prio(struct cache *ca, int rw)
*/
max_delta = 1;
- for_each_cache(ca, c, i)
+ for_each_member_device(ca, c, i)
max_delta = max(max_delta,
(u16) (clock->hand - ca->min_prio[rw]));
clock->min_prio = clock->hand - max_delta;
}
-static void bch_rescale_prios(struct cache_set *c, int rw)
+static void bch_rescale_prios(struct bch_fs *c, int rw)
{
struct prio_clock *clock = &c->prio_clock[rw];
- struct cache *ca;
+ struct bch_dev *ca;
struct bucket *g;
unsigned i;
trace_bcache_rescale_prios(c);
- for_each_cache(ca, c, i) {
+ for_each_member_device(ca, c, i) {
for_each_bucket(g, ca)
g->prio[rw] = clock->hand -
(clock->hand - g->prio[rw]) / 2;
@@ -561,8 +573,8 @@ static void bch_inc_clock_hand(struct io_timer *timer)
{
struct prio_clock *clock = container_of(timer,
struct prio_clock, rescale);
- struct cache_set *c = container_of(clock,
- struct cache_set, prio_clock[clock->rw]);
+ struct bch_fs *c = container_of(clock,
+ struct bch_fs, prio_clock[clock->rw]);
u64 capacity;
mutex_lock(&c->bucket_lock);
@@ -581,7 +593,7 @@ static void bch_inc_clock_hand(struct io_timer *timer)
return;
/*
- * we only increment when 0.1% of the cache_set has been read
+ * we only increment when 0.1% of the filesystem capacity has been read
* or written too, this determines if it's time
*
* XXX: we shouldn't really be going off of the capacity of devices in
@@ -593,7 +605,7 @@ static void bch_inc_clock_hand(struct io_timer *timer)
bch_io_timer_add(&c->io_clock[clock->rw], timer);
}
-static void bch_prio_timer_init(struct cache_set *c, int rw)
+static void bch_prio_timer_init(struct bch_fs *c, int rw)
{
struct prio_clock *clock = &c->prio_clock[rw];
struct io_timer *timer = &clock->rescale;
@@ -610,12 +622,12 @@ static void bch_prio_timer_init(struct cache_set *c, int rw)
* them on the various freelists.
*/
-static inline bool can_inc_bucket_gen(struct cache *ca, struct bucket *g)
+static inline bool can_inc_bucket_gen(struct bch_dev *ca, struct bucket *g)
{
return bucket_gc_gen(ca, g) < BUCKET_GC_GEN_MAX;
}
-static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g)
+static bool bch_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
{
if (!is_available_bucket(READ_ONCE(g->mark)))
return false;
@@ -626,14 +638,14 @@ static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g)
return can_inc_bucket_gen(ca, g);
}
-static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g)
+static void bch_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g)
{
spin_lock(&ca->freelist_lock);
bch_invalidate_bucket(ca, g);
- g->read_prio = ca->set->prio_clock[READ].hand;
- g->write_prio = ca->set->prio_clock[WRITE].hand;
+ g->read_prio = ca->fs->prio_clock[READ].hand;
+ g->write_prio = ca->fs->prio_clock[WRITE].hand;
verify_not_on_freelist(ca, g - ca->buckets);
BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets));
@@ -661,13 +673,13 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g)
#define bucket_sort_key(g) \
({ \
unsigned long prio = g->read_prio - ca->min_prio[READ]; \
- prio = (prio * 7) / (ca->set->prio_clock[READ].hand - \
+ prio = (prio * 7) / (ca->fs->prio_clock[READ].hand - \
ca->min_prio[READ]); \
\
(((prio + 1) * bucket_sectors_used(g)) << 8) | bucket_gc_gen(ca, g);\
})
-static void invalidate_buckets_lru(struct cache *ca)
+static void invalidate_buckets_lru(struct bch_dev *ca)
{
struct bucket_heap_entry e;
struct bucket *g;
@@ -677,7 +689,7 @@ static void invalidate_buckets_lru(struct cache *ca)
ca->heap.used = 0;
- mutex_lock(&ca->set->bucket_lock);
+ mutex_lock(&ca->fs->bucket_lock);
bch_recalc_min_prio(ca, READ);
bch_recalc_min_prio(ca, WRITE);
@@ -712,11 +724,11 @@ static void invalidate_buckets_lru(struct cache *ca)
bch_invalidate_one_bucket(ca, e.g);
}
- mutex_unlock(&ca->set->bucket_lock);
+ mutex_unlock(&ca->fs->bucket_lock);
mutex_unlock(&ca->heap_lock);
}
-static void invalidate_buckets_fifo(struct cache *ca)
+static void invalidate_buckets_fifo(struct bch_dev *ca)
{
struct bucket *g;
size_t checked = 0;
@@ -736,7 +748,7 @@ static void invalidate_buckets_fifo(struct cache *ca)
}
}
-static void invalidate_buckets_random(struct cache *ca)
+static void invalidate_buckets_random(struct bch_dev *ca)
{
struct bucket *g;
size_t checked = 0;
@@ -756,7 +768,7 @@ static void invalidate_buckets_random(struct cache *ca)
}
}
-static void invalidate_buckets(struct cache *ca)
+static void invalidate_buckets(struct bch_dev *ca)
{
ca->inc_gen_needs_gc = 0;
@@ -773,7 +785,7 @@ static void invalidate_buckets(struct cache *ca)
}
}
-static bool __bch_allocator_push(struct cache *ca, long bucket)
+static bool __bch_allocator_push(struct bch_dev *ca, long bucket)
{
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
goto success;
@@ -789,11 +801,11 @@ static bool __bch_allocator_push(struct cache *ca, long bucket)
return false;
success:
- closure_wake_up(&ca->set->freelist_wait);
+ closure_wake_up(&ca->fs->freelist_wait);
return true;
}
-static bool bch_allocator_push(struct cache *ca, long bucket)
+static bool bch_allocator_push(struct bch_dev *ca, long bucket)
{
bool ret;
@@ -806,7 +818,7 @@ static bool bch_allocator_push(struct cache *ca, long bucket)
return ret;
}
-static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
+static void bch_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca)
{
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
struct bucket *g;
@@ -821,8 +833,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
spin_lock(&ca->freelist_lock);
bch_mark_alloc_bucket(ca, g, true);
- g->read_prio = ca->set->prio_clock[READ].hand;
- g->write_prio = ca->set->prio_clock[WRITE].hand;
+ g->read_prio = c->prio_clock[READ].hand;
+ g->write_prio = c->prio_clock[WRITE].hand;
verify_not_on_freelist(ca, g - ca->buckets);
BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets));
@@ -845,8 +857,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
*/
static int bch_allocator_thread(void *arg)
{
- struct cache *ca = arg;
- struct cache_set *c = ca->set;
+ struct bch_dev *ca = arg;
+ struct bch_fs *c = ca->fs;
int ret;
set_freezable();
@@ -956,7 +968,7 @@ static int bch_allocator_thread(void *arg)
}
out:
/*
- * Avoid a race with bucket_stats_update() trying to wake us up after
+ * Avoid a race with bch_usage_update() trying to wake us up after
* we've exited:
*/
synchronize_rcu();
@@ -970,7 +982,7 @@ out:
*
* Returns index of bucket on success, 0 on failure
* */
-size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve)
+size_t bch_bucket_alloc(struct bch_dev *ca, enum alloc_reserve reserve)
{
struct bucket *g;
long r;
@@ -994,18 +1006,18 @@ out:
g = ca->buckets + r;
- g->read_prio = ca->set->prio_clock[READ].hand;
- g->write_prio = ca->set->prio_clock[WRITE].hand;
+ g->read_prio = ca->fs->prio_clock[READ].hand;
+ g->write_prio = ca->fs->prio_clock[WRITE].hand;
return r;
}
-static void __bch_bucket_free(struct cache *ca, struct bucket *g)
+static void __bch_bucket_free(struct bch_dev *ca, struct bucket *g)
{
bch_mark_free_bucket(ca, g);
- g->read_prio = ca->set->prio_clock[READ].hand;
- g->write_prio = ca->set->prio_clock[WRITE].hand;
+ g->read_prio = ca->fs->prio_clock[READ].hand;
+ g->write_prio = ca->fs->prio_clock[WRITE].hand;
}
enum bucket_alloc_ret {
@@ -1014,17 +1026,17 @@ enum bucket_alloc_ret {
FREELIST_EMPTY, /* Allocator thread not keeping up */
};
-static void recalc_alloc_group_weights(struct cache_set *c,
- struct cache_group *devs)
+static void recalc_alloc_group_weights(struct bch_fs *c,
+ struct dev_group *devs)
{
- struct cache *ca;
+ struct bch_dev *ca;
u64 available_buckets = 1; /* avoid a divide by zero... */
unsigned i;
for (i = 0; i < devs->nr; i++) {
ca = devs->d[i].dev;
- devs->d[i].weight = buckets_free_cache(ca);
+ devs->d[i].weight = dev_buckets_free(ca);
available_buckets += devs->d[i].weight;
}
@@ -1042,12 +1054,12 @@ static void recalc_alloc_group_weights(struct cache_set *c,
}
}
-static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
+static enum bucket_alloc_ret bch_bucket_alloc_group(struct bch_fs *c,
struct open_bucket *ob,
enum alloc_reserve reserve,
unsigned nr_replicas,
- struct cache_group *devs,
- long *caches_used)
+ struct dev_group *devs,
+ long *devs_used)
{
enum bucket_alloc_ret ret;
unsigned fail_idx = -1, i;
@@ -1058,19 +1070,18 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
if (ob->nr_ptrs >= nr_replicas)
return ALLOC_SUCCESS;
- rcu_read_lock();
spin_lock(&devs->lock);
for (i = 0; i < devs->nr; i++)
available += !test_bit(devs->d[i].dev->dev_idx,
- caches_used);
+ devs_used);
recalc_alloc_group_weights(c, devs);
i = devs->cur_device;
while (ob->nr_ptrs < nr_replicas) {
- struct cache *ca;
+ struct bch_dev *ca;
u64 bucket;
if (!available) {
@@ -1087,7 +1098,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
ca = devs->d[i].dev;
- if (test_bit(ca->dev_idx, caches_used))
+ if (test_bit(ca->dev_idx, devs_used))
continue;
if (fail_idx == -1 &&
@@ -1119,7 +1130,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
};
ob->ptr_offset[0] = 0;
- __set_bit(ca->dev_idx, caches_used);
+ __set_bit(ca->dev_idx, devs_used);
available--;
devs->cur_device = i;
}
@@ -1128,16 +1139,15 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
err:
EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC);
spin_unlock(&devs->lock);
- rcu_read_unlock();
return ret;
}
-static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
+static enum bucket_alloc_ret __bch_bucket_alloc_set(struct bch_fs *c,
struct write_point *wp,
struct open_bucket *ob,
unsigned nr_replicas,
enum alloc_reserve reserve,
- long *caches_used)
+ long *devs_used)
{
struct bch_tier *tier;
/*
@@ -1148,28 +1158,28 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
*/
if (wp->group)
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
- wp->group, caches_used);
+ wp->group, devs_used);
/* foreground writes: prefer fastest tier: */
tier = READ_ONCE(c->fastest_tier);
if (tier)
bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
- &tier->devs, caches_used);
+ &tier->devs, devs_used);
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
- &c->cache_all, caches_used);
+ &c->all_devs, devs_used);
}
-static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp,
+static int bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
struct open_bucket *ob, unsigned nr_replicas,
- enum alloc_reserve reserve, long *caches_used,
+ enum alloc_reserve reserve, long *devs_used,
struct closure *cl)
{
bool waiting = false;
while (1) {
switch (__bch_bucket_alloc_set(c, wp, ob, nr_replicas,
- reserve, caches_used)) {
+ reserve, devs_used)) {
case ALLOC_SUCCESS:
if (waiting)
closure_wake_up(&c->freelist_wait);
@@ -1220,17 +1230,17 @@ static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp,
* reference _after_ doing the index update that makes its allocation reachable.
*/
-static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob)
+static void __bch_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
{
const struct bch_extent_ptr *ptr;
- struct cache *ca;
lockdep_assert_held(&c->open_buckets_lock);
- rcu_read_lock();
- open_bucket_for_each_online_device(c, ob, ptr, ca)
+ open_bucket_for_each_ptr(ob, ptr) {
+ struct bch_dev *ca = c->devs[ptr->dev];
+
bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false);
- rcu_read_unlock();
+ }
ob->nr_ptrs = 0;
@@ -1239,7 +1249,7 @@ static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob)
closure_wake_up(&c->open_buckets_wait);
}
-void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b)
+void bch_open_bucket_put(struct bch_fs *c, struct open_bucket *b)
{
if (atomic_dec_and_test(&b->pin)) {
spin_lock(&c->open_buckets_lock);
@@ -1248,7 +1258,7 @@ void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b)
}
}
-static struct open_bucket *bch_open_bucket_get(struct cache_set *c,
+static struct open_bucket *bch_open_bucket_get(struct bch_fs *c,
unsigned nr_reserved,
struct closure *cl)
{
@@ -1283,12 +1293,13 @@ static struct open_bucket *bch_open_bucket_get(struct cache_set *c,
return ret;
}
-static unsigned ob_ptr_sectors_free(struct open_bucket *ob,
- struct cache_member_rcu *mi,
+static unsigned ob_ptr_sectors_free(struct bch_fs *c,
+ struct open_bucket *ob,
struct bch_extent_ptr *ptr)
{
+ struct bch_dev *ca = c->devs[ptr->dev];
unsigned i = ptr - ob->ptrs;
- unsigned bucket_size = mi->m[ptr->dev].bucket_size;
+ unsigned bucket_size = ca->mi.bucket_size;
unsigned used = (ptr->offset & (bucket_size - 1)) +
ob->ptr_offset[i];
@@ -1297,31 +1308,27 @@ static unsigned ob_ptr_sectors_free(struct open_bucket *ob,
return bucket_size - used;
}
-static unsigned open_bucket_sectors_free(struct cache_set *c,
+static unsigned open_bucket_sectors_free(struct bch_fs *c,
struct open_bucket *ob,
unsigned nr_replicas)
{
- struct cache_member_rcu *mi = cache_member_info_get(c);
unsigned i, sectors_free = UINT_MAX;
for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
sectors_free = min(sectors_free,
- ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]));
-
- cache_member_info_put();
+ ob_ptr_sectors_free(c, ob, &ob->ptrs[i]));
return sectors_free != UINT_MAX ? sectors_free : 0;
}
-static void open_bucket_copy_unused_ptrs(struct cache_set *c,
+static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
struct open_bucket *new,
struct open_bucket *old)
{
- struct cache_member_rcu *mi = cache_member_info_get(c);
unsigned i;
for (i = 0; i < old->nr_ptrs; i++)
- if (ob_ptr_sectors_free(old, mi, &old->ptrs[i])) {
+ if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) {
struct bch_extent_ptr tmp = old->ptrs[i];
tmp.offset += old->ptr_offset[i];
@@ -1329,25 +1336,24 @@ static void open_bucket_copy_unused_ptrs(struct cache_set *c,
new->ptr_offset[new->nr_ptrs] = 0;
new->nr_ptrs++;
}
- cache_member_info_put();
}
-static void verify_not_stale(struct cache_set *c, const struct open_bucket *ob)
+static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob)
{
#ifdef CONFIG_BCACHE_DEBUG
const struct bch_extent_ptr *ptr;
- struct cache *ca;
- rcu_read_lock();
- open_bucket_for_each_online_device(c, ob, ptr, ca)
+ open_bucket_for_each_ptr(ob, ptr) {
+ struct bch_dev *ca = c->devs[ptr->dev];
+
BUG_ON(ptr_stale(ca, ptr));
- rcu_read_unlock();
+ }
#endif
}
/* Sector allocator */
-static struct open_bucket *lock_writepoint(struct cache_set *c,
+static struct open_bucket *lock_writepoint(struct bch_fs *c,
struct write_point *wp)
{
struct open_bucket *ob;
@@ -1363,7 +1369,7 @@ static struct open_bucket *lock_writepoint(struct cache_set *c,
return ob;
}
-static int open_bucket_add_buckets(struct cache_set *c,
+static int open_bucket_add_buckets(struct bch_fs *c,
struct write_point *wp,
struct open_bucket *ob,
unsigned nr_replicas,
@@ -1371,7 +1377,7 @@ static int open_bucket_add_buckets(struct cache_set *c,
enum alloc_reserve reserve,
struct closure *cl)
{
- long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
+ long devs_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
unsigned i;
int ret;
@@ -1386,13 +1392,13 @@ static int open_bucket_add_buckets(struct cache_set *c,
if (ob->nr_ptrs >= nr_replicas)
return 0;
- memset(caches_used, 0, sizeof(caches_used));
+ memset(devs_used, 0, sizeof(devs_used));
for (i = 0; i < ob->nr_ptrs; i++)
- __set_bit(ob->ptrs[i].dev, caches_used);
+ __set_bit(ob->ptrs[i].dev, devs_used);
ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas,
- reserve, caches_used, cl);
+ reserve, devs_used, cl);
if (ret == -EROFS &&
ob->nr_ptrs >= nr_replicas_required)
@@ -1404,7 +1410,7 @@ static int open_bucket_add_buckets(struct cache_set *c,
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
-struct open_bucket *bch_alloc_sectors_start(struct cache_set *c,
+struct open_bucket *bch_alloc_sectors_start(struct bch_fs *c,
struct write_point *wp,
unsigned nr_replicas,
unsigned nr_replicas_required,
@@ -1480,12 +1486,11 @@ retry:
* Append pointers to the space we just allocated to @k, and mark @sectors space
* as allocated out of @ob
*/
-void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
+void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
unsigned nr_replicas, struct open_bucket *ob,
unsigned sectors)
{
struct bch_extent_ptr tmp;
- struct cache *ca;
bool has_data = false;
unsigned i;
@@ -1500,8 +1505,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
if (nr_replicas < ob->nr_ptrs)
has_data = true;
- rcu_read_lock();
-
for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
@@ -1512,33 +1515,27 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
ob->ptr_offset[i] += sectors;
- if ((ca = PTR_CACHE(c, &ob->ptrs[i])))
- this_cpu_add(*ca->sectors_written, sectors);
+ this_cpu_add(*c->devs[tmp.dev]->sectors_written, sectors);
}
-
- rcu_read_unlock();
}
/*
* Append pointers to the space we just allocated to @k, and mark @sectors space
* as allocated out of @ob
*/
-void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp,
+void bch_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
struct open_bucket *ob)
{
- struct cache_member_rcu *mi = cache_member_info_get(c);
bool has_data = false;
unsigned i;
for (i = 0; i < ob->nr_ptrs; i++) {
- if (!ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]))
+ if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i]))
ob->has_full_ptrs = true;
else
has_data = true;
}
- cache_member_info_put();
-
if (likely(has_data))
atomic_inc(&ob->pin);
else
@@ -1559,12 +1556,12 @@ void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp,
* - -EAGAIN: closure was added to waitlist
* - -ENOSPC: out of space and no closure provided
*
- * @c - cache set.
+ * @c - filesystem.
* @wp - write point to use for allocating sectors.
* @k - key to return the allocated space information.
* @cl - closure to wait for a bucket
*/
-struct open_bucket *bch_alloc_sectors(struct cache_set *c,
+struct open_bucket *bch_alloc_sectors(struct bch_fs *c,
struct write_point *wp,
struct bkey_i_extent *e,
unsigned nr_replicas,
@@ -1592,16 +1589,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c,
/* Startup/shutdown (ro/rw): */
-void bch_recalc_capacity(struct cache_set *c)
+void bch_recalc_capacity(struct bch_fs *c)
{
struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier;
- struct cache *ca;
+ struct bch_dev *ca;
u64 total_capacity, capacity = 0, reserved_sectors = 0;
unsigned long ra_pages = 0;
unsigned i, j;
- rcu_read_lock();
- for_each_cache_rcu(ca, c, i) {
+ for_each_online_member(ca, c, i) {
struct backing_dev_info *bdi =
blk_get_backing_dev_info(ca->disk_sb.bdev);
@@ -1629,10 +1625,11 @@ void bch_recalc_capacity(struct cache_set *c)
goto set_capacity;
/*
- * Capacity of the cache set is the capacity of all the devices in the
+ * Capacity of the filesystem is the capacity of all the devices in the
* slowest (highest) tier - we don't include lower tier devices.
*/
- group_for_each_cache_rcu(ca, &slowest_tier->devs, i) {
+ spin_lock(&slowest_tier->devs.lock);
+ group_for_each_dev(ca, &slowest_tier->devs, i) {
size_t reserve = 0;
/*
@@ -1668,8 +1665,8 @@ void bch_recalc_capacity(struct cache_set *c)
ca->mi.first_bucket) <<
ca->bucket_bits;
}
+ spin_unlock(&slowest_tier->devs.lock);
set_capacity:
- rcu_read_unlock();
total_capacity = capacity;
capacity *= (100 - c->opts.gc_reserve_percent);
@@ -1695,10 +1692,10 @@ set_capacity:
closure_wake_up(&c->freelist_wait);
}
-static void bch_stop_write_point(struct cache *ca,
+static void bch_stop_write_point(struct bch_dev *ca,
struct write_point *wp)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
struct open_bucket *ob;
struct bch_extent_ptr *ptr;
@@ -1720,9 +1717,9 @@ found:
bch_open_bucket_put(c, ob);
}
-static bool bch_dev_has_open_write_point(struct cache *ca)
+static bool bch_dev_has_open_write_point(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
struct bch_extent_ptr *ptr;
struct open_bucket *ob;
@@ -1743,10 +1740,10 @@ static bool bch_dev_has_open_write_point(struct cache *ca)
}
/* device goes ro: */
-void bch_dev_allocator_stop(struct cache *ca)
+void bch_dev_allocator_stop(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
- struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
+ struct bch_fs *c = ca->fs;
+ struct dev_group *tier = &c->tiers[ca->mi.tier].devs;
struct task_struct *p;
struct closure cl;
unsigned i;
@@ -1756,7 +1753,7 @@ void bch_dev_allocator_stop(struct cache *ca)
/* First, remove device from allocation groups: */
bch_dev_group_remove(tier, ca);
- bch_dev_group_remove(&c->cache_all, ca);
+ bch_dev_group_remove(&c->all_devs, ca);
bch_recalc_capacity(c);
@@ -1771,7 +1768,7 @@ void bch_dev_allocator_stop(struct cache *ca)
/*
* We need an rcu barrier between setting ca->alloc_thread = NULL and
- * the thread shutting down to avoid a race with bucket_stats_update() -
+ * the thread shutting down to avoid a race with bch_usage_update() -
* the allocator thread itself does a synchronize_rcu() on exit.
*
* XXX: it would be better to have the rcu barrier be asynchronous
@@ -1824,10 +1821,12 @@ void bch_dev_allocator_stop(struct cache *ca)
/*
* Startup the allocator thread for transition to RW mode:
*/
-int bch_dev_allocator_start(struct cache *ca)
+int bch_dev_allocator_start(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
- struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
+ struct bch_fs *c = ca->fs;
+ struct dev_group *tier = &c->tiers[ca->mi.tier].devs;
+ struct bch_sb_field_journal *journal_buckets;
+ bool has_journal;
struct task_struct *k;
/*
@@ -1844,8 +1843,16 @@ int bch_dev_allocator_start(struct cache *ca)
ca->alloc_thread = k;
bch_dev_group_add(tier, ca);
- bch_dev_group_add(&c->cache_all, ca);
- bch_dev_group_add(&c->journal.devs, ca);
+ bch_dev_group_add(&c->all_devs, ca);
+
+ mutex_lock(&c->sb_lock);
+ journal_buckets = bch_sb_get_journal(ca->disk_sb.sb);
+ has_journal = bch_nr_journal_buckets(journal_buckets) >=
+ BCH_JOURNAL_BUCKETS_MIN;
+ mutex_unlock(&c->sb_lock);
+
+ if (has_journal)
+ bch_dev_group_add(&c->journal.devs, ca);
bch_recalc_capacity(c);
@@ -1858,7 +1865,7 @@ int bch_dev_allocator_start(struct cache *ca)
return 0;
}
-void bch_fs_allocator_init(struct cache_set *c)
+void bch_fs_allocator_init(struct bch_fs *c)
{
unsigned i;
@@ -1878,7 +1885,7 @@ void bch_fs_allocator_init(struct cache_set *c)
list_add(&c->open_buckets[i].list, &c->open_buckets_free);
}
- spin_lock_init(&c->cache_all.lock);
+ spin_lock_init(&c->all_devs.lock);
for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
spin_lock_init(&c->tiers[i].devs.lock);