summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-11-22 00:42:55 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2017-11-22 00:50:47 -0500
commit22291ae84a029d65334d1a90b67b5031f45cd540 (patch)
treeab9fefe205577324915545b21535fcccbff89f48 /libbcachefs/alloc.c
parent74cb92203293a8d5b16b078389f6b3dba5300e89 (diff)
Update bcachefs sources to 9e7ae5219c bcachefs: Make write points more dynamic
Diffstat (limited to 'libbcachefs/alloc.c')
-rw-r--r--libbcachefs/alloc.c600
1 files changed, 371 insertions, 229 deletions
diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c
index a1086576..dc7348fc 100644
--- a/libbcachefs/alloc.c
+++ b/libbcachefs/alloc.c
@@ -70,6 +70,7 @@
#include <linux/kthread.h>
#include <linux/math64.h>
#include <linux/random.h>
+#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/sched/task.h>
#include <linux/sort.h>
@@ -1118,6 +1119,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
{
enum bucket_alloc_ret ret = NO_DEVICES;
struct dev_alloc_list devs_sorted;
+ u64 buckets_free;
unsigned i;
BUG_ON(nr_replicas > ARRAY_SIZE(ob->ptrs));
@@ -1127,46 +1129,55 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
rcu_read_lock();
devs_sorted = bch2_wp_alloc_list(c, wp, devs);
+ spin_lock(&ob->lock);
for (i = 0; i < devs_sorted.nr; i++) {
struct bch_dev *ca =
rcu_dereference(c->devs[devs_sorted.devs[i]]);
- long bucket;
+ struct open_bucket_ptr ptr;
if (!ca)
continue;
- bucket = bch2_bucket_alloc(c, ca, reserve);
- if (bucket < 0) {
- ret = FREELIST_EMPTY;
- continue;
- }
-
- wp->next_alloc[ca->dev_idx] +=
- div64_u64(U64_MAX, dev_buckets_free(ca) *
- ca->mi.bucket_size);
- bch2_wp_rescale(c, ca, wp);
+ if (wp->type == BCH_DATA_USER &&
+ ca->open_buckets_partial_nr) {
+ ptr = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
+ } else {
+ long bucket = bch2_bucket_alloc(c, ca, reserve);
+ if (bucket < 0) {
+ ret = FREELIST_EMPTY;
+ continue;
+ }
- __clear_bit(ca->dev_idx, devs->d);
+ ptr = (struct open_bucket_ptr) {
+ .ptr.gen = ca->buckets[bucket].mark.gen,
+ .ptr.offset = bucket_to_sector(ca, bucket),
+ .ptr.dev = ca->dev_idx,
+ .sectors_free = ca->mi.bucket_size,
+ };
+ }
/*
* open_bucket_add_buckets expects new pointers at the head of
* the list:
*/
- BUG_ON(ob->nr_ptrs >= BCH_REPLICAS_MAX);
+ BUG_ON(ob->nr_ptrs >= ARRAY_SIZE(ob->ptrs));
memmove(&ob->ptrs[1],
&ob->ptrs[0],
ob->nr_ptrs * sizeof(ob->ptrs[0]));
- memmove(&ob->ptr_offset[1],
- &ob->ptr_offset[0],
- ob->nr_ptrs * sizeof(ob->ptr_offset[0]));
ob->nr_ptrs++;
- ob->ptrs[0] = (struct bch_extent_ptr) {
- .gen = ca->buckets[bucket].mark.gen,
- .offset = bucket_to_sector(ca, bucket),
- .dev = ca->dev_idx,
- };
- ob->ptr_offset[0] = 0;
+ ob->ptrs[0] = ptr;
+
+ buckets_free = U64_MAX, dev_buckets_free(ca);
+ if (buckets_free)
+ wp->next_alloc[ca->dev_idx] +=
+ div64_u64(U64_MAX, buckets_free *
+ ca->mi.bucket_size);
+ else
+ wp->next_alloc[ca->dev_idx] = U64_MAX;
+ bch2_wp_rescale(c, ca, wp);
+
+ __clear_bit(ca->dev_idx, devs->d);
if (ob->nr_ptrs == nr_replicas) {
ret = ALLOC_SUCCESS;
@@ -1175,6 +1186,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
}
EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC);
+ spin_unlock(&ob->lock);
rcu_read_unlock();
return ret;
}
@@ -1242,24 +1254,45 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
{
- const struct bch_extent_ptr *ptr;
+ const struct open_bucket_ptr *ptr;
u8 new_ob;
if (!atomic_dec_and_test(&ob->pin))
return;
- spin_lock(&c->open_buckets_lock);
+ down_read(&c->alloc_gc_lock);
+ spin_lock(&ob->lock);
+
open_bucket_for_each_ptr(ob, ptr) {
- struct bch_dev *ca = c->devs[ptr->dev];
+ struct bch_dev *ca = c->devs[ptr->ptr.dev];
- bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false);
+ if (ptr->sectors_free) {
+ /*
+ * This is a ptr to a bucket that still has free space,
+ * but we don't want to use it
+ */
+ BUG_ON(ca->open_buckets_partial_nr >=
+ ARRAY_SIZE(ca->open_buckets_partial));
+
+ spin_lock(&ca->freelist_lock);
+ ca->open_buckets_partial[ca->open_buckets_partial_nr++]
+ = *ptr;
+ spin_unlock(&ca->freelist_lock);
+ } else {
+ bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, &ptr->ptr), false);
+ }
}
-
ob->nr_ptrs = 0;
+
+ spin_unlock(&ob->lock);
+ up_read(&c->alloc_gc_lock);
+
new_ob = ob->new_ob;
ob->new_ob = 0;
- list_move(&ob->list, &c->open_buckets_free);
+ spin_lock(&c->open_buckets_lock);
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
c->open_buckets_nr_free++;
spin_unlock(&c->open_buckets_lock);
@@ -1270,22 +1303,19 @@ void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
}
static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
- unsigned nr_reserved,
- struct closure *cl)
+ unsigned nr_reserved,
+ struct closure *cl)
{
struct open_bucket *ret;
spin_lock(&c->open_buckets_lock);
if (c->open_buckets_nr_free > nr_reserved) {
- BUG_ON(list_empty(&c->open_buckets_free));
- ret = list_first_entry(&c->open_buckets_free,
- struct open_bucket, list);
- list_move(&ret->list, &c->open_buckets_open);
- BUG_ON(ret->nr_ptrs);
+ BUG_ON(!c->open_buckets_freelist);
+ ret = c->open_buckets + c->open_buckets_freelist;
+ c->open_buckets_freelist = ret->freelist;
atomic_set(&ret->pin, 1); /* XXX */
- ret->has_full_ptrs = false;
BUG_ON(ret->new_ob);
BUG_ON(ret->nr_ptrs);
@@ -1307,148 +1337,259 @@ static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
return ret;
}
-static unsigned ob_ptr_sectors_free(struct bch_fs *c,
- struct open_bucket *ob,
- struct bch_extent_ptr *ptr)
-{
- struct bch_dev *ca = c->devs[ptr->dev];
- unsigned i = ptr - ob->ptrs;
- unsigned used = bucket_remainder(ca, ptr->offset) +
- ob->ptr_offset[i];
-
- BUG_ON(used > ca->mi.bucket_size);
-
- return ca->mi.bucket_size - used;
-}
-
static unsigned open_bucket_sectors_free(struct bch_fs *c,
struct open_bucket *ob,
unsigned nr_replicas)
{
- unsigned i, sectors_free = UINT_MAX;
+ unsigned sectors_free = UINT_MAX;
+ struct open_bucket_ptr *ptr;
- for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
- sectors_free = min(sectors_free,
- ob_ptr_sectors_free(c, ob, &ob->ptrs[i]));
+ open_bucket_for_each_ptr(ob, ptr)
+ sectors_free = min(sectors_free, ptr->sectors_free);
return sectors_free != UINT_MAX ? sectors_free : 0;
}
-static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
- struct open_bucket *new,
- struct open_bucket *old)
+static void open_bucket_move_ptrs(struct bch_fs *c,
+ struct open_bucket *dst,
+ struct open_bucket *src,
+ struct bch_devs_mask *devs,
+ unsigned nr_ptrs_dislike)
{
bool moved_ptr = false;
int i;
- for (i = old->nr_ptrs - 1; i >= 0; --i)
- if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) {
- BUG_ON(new->nr_ptrs >= BCH_REPLICAS_MAX);
+ down_read(&c->alloc_gc_lock);
- new->ptrs[new->nr_ptrs] = old->ptrs[i];
- new->ptr_offset[new->nr_ptrs] = old->ptr_offset[i];
- new->nr_ptrs++;
+ if (dst < src) {
+ spin_lock(&dst->lock);
+ spin_lock_nested(&src->lock, 1);
+ } else {
+ spin_lock(&src->lock);
+ spin_lock_nested(&dst->lock, 1);
+ }
- old->nr_ptrs--;
- memmove(&old->ptrs[i],
- &old->ptrs[i + 1],
- (old->nr_ptrs - i) * sizeof(old->ptrs[0]));
- memmove(&old->ptr_offset[i],
- &old->ptr_offset[i + 1],
- (old->nr_ptrs - i) * sizeof(old->ptr_offset[0]));
+ for (i = src->nr_ptrs - 1; i >= 0; --i) {
+ if (!src->ptrs[i].sectors_free) {
+ /*
+ * Don't do anything: leave the ptr on the old
+ * open_bucket for gc to find
+ */
+ } else if (nr_ptrs_dislike &&
+ !test_bit(src->ptrs[i].ptr.dev, devs->d)) {
+ /*
+ * We don't want this pointer; bch2_open_bucket_put()
+ * will stick it on ca->open_buckets_partial to be
+ * reused
+ */
+ --nr_ptrs_dislike;
+ } else {
+ BUG_ON(dst->nr_ptrs >= ARRAY_SIZE(dst->ptrs));
+
+ dst->ptrs[dst->nr_ptrs++] = src->ptrs[i];
+
+ src->nr_ptrs--;
+ memmove(&src->ptrs[i],
+ &src->ptrs[i + 1],
+ (src->nr_ptrs - i) * sizeof(src->ptrs[0]));
moved_ptr = true;
}
+ }
if (moved_ptr) {
- BUG_ON(old->new_ob);
+ BUG_ON(src->new_ob);
- atomic_inc(&new->pin);
- old->new_ob = new - c->open_buckets;
+ atomic_inc(&dst->pin);
+ src->new_ob = dst - c->open_buckets;
}
+
+ spin_unlock(&dst->lock);
+ spin_unlock(&src->lock);
+ up_read(&c->alloc_gc_lock);
}
static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob)
{
#ifdef CONFIG_BCACHEFS_DEBUG
- const struct bch_extent_ptr *ptr;
+ const struct open_bucket_ptr *ptr;
open_bucket_for_each_ptr(ob, ptr) {
- struct bch_dev *ca = c->devs[ptr->dev];
+ struct bch_dev *ca = c->devs[ptr->ptr.dev];
- BUG_ON(ptr_stale(ca, ptr));
+ BUG_ON(ptr_stale(ca, &ptr->ptr));
}
#endif
}
/* Sector allocator */
-static struct open_bucket *lock_writepoint(struct bch_fs *c,
- struct write_point *wp)
-{
- struct open_bucket *ob;
-
- while ((ob = ACCESS_ONCE(wp->b))) {
- mutex_lock(&ob->lock);
- if (wp->b == ob)
- break;
-
- mutex_unlock(&ob->lock);
- }
-
- return ob;
-}
-
static int open_bucket_add_buckets(struct bch_fs *c,
struct write_point *wp,
+ struct bch_devs_mask *_devs,
struct open_bucket *ob,
unsigned nr_replicas,
- unsigned nr_replicas_required,
enum alloc_reserve reserve,
struct closure *cl)
{
struct bch_devs_mask devs = c->rw_devs[wp->type];
- unsigned i;
- int ret;
+ struct open_bucket_ptr *ptr;
if (ob->nr_ptrs >= nr_replicas)
return 0;
+ if (_devs)
+ bitmap_and(devs.d, devs.d, _devs->d, BCH_SB_MEMBERS_MAX);
+
/* Don't allocate from devices we already have pointers to: */
- for (i = 0; i < ob->nr_ptrs; i++)
- __clear_bit(ob->ptrs[i].dev, devs.d);
+ open_bucket_for_each_ptr(ob, ptr)
+ if (ptr->sectors_free)
+ __clear_bit(ptr->ptr.dev, devs.d);
- if (wp->group)
- bitmap_and(devs.d, devs.d, wp->group->d, BCH_SB_MEMBERS_MAX);
+ return bch2_bucket_alloc_set(c, wp, ob, nr_replicas,
+ reserve, &devs, cl);
+}
- ret = bch2_bucket_alloc_set(c, wp, ob, nr_replicas,
- reserve, &devs, cl);
+static struct write_point *__writepoint_find(struct hlist_head *head,
+ unsigned long write_point)
+{
+ struct write_point *wp;
- if (ret == -EROFS &&
- ob->nr_ptrs >= nr_replicas_required)
- ret = 0;
+ hlist_for_each_entry_rcu(wp, head, node) {
+ if (wp->write_point == write_point)
+ continue;
- return ret;
+ mutex_lock(&wp->lock);
+ if (wp->write_point == write_point)
+ return wp;
+ mutex_unlock(&wp->lock);
+ }
+
+ return NULL;
+}
+
+static struct hlist_head *writepoint_hash(struct bch_fs *c,
+ unsigned long write_point)
+{
+ unsigned hash =
+ hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+ return &c->write_points_hash[hash];
+}
+
+static struct write_point *writepoint_find(struct bch_fs *c,
+ enum bch_data_type data_type,
+ unsigned long write_point)
+{
+ struct write_point *wp, *oldest = NULL;
+ struct hlist_head *head;
+
+ switch (data_type) {
+ case BCH_DATA_BTREE:
+ wp = &c->btree_write_point;
+ mutex_lock(&wp->lock);
+ return wp;
+ case BCH_DATA_USER:
+ break;
+ default:
+ BUG();
+ }
+
+ head = writepoint_hash(c, write_point);
+ wp = __writepoint_find(head, write_point);
+ if (wp)
+ goto out;
+
+ mutex_lock(&c->write_points_hash_lock);
+ wp = __writepoint_find(head, write_point);
+ if (wp)
+ goto out_unlock;
+
+ for (wp = c->write_points;
+ wp < c->write_points + ARRAY_SIZE(c->write_points);
+ wp++)
+ if (!oldest || time_before64(wp->last_used, oldest->last_used))
+ oldest = wp;
+
+ wp = oldest;
+ BUG_ON(!wp);
+
+ mutex_lock(&wp->lock);
+ hlist_del_rcu(&wp->node);
+ wp->write_point = write_point;
+ hlist_add_head_rcu(&wp->node, head);
+out_unlock:
+ mutex_unlock(&c->write_points_hash_lock);
+out:
+ wp->last_used = sched_clock();
+ return wp;
}
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
-struct open_bucket *bch2_alloc_sectors_start(struct bch_fs *c,
- struct write_point *wp,
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+ enum bch_data_type data_type,
+ struct bch_devs_mask *devs,
+ unsigned long write_point,
unsigned nr_replicas,
unsigned nr_replicas_required,
enum alloc_reserve reserve,
+ unsigned flags,
struct closure *cl)
{
struct open_bucket *ob;
- unsigned open_buckets_reserved = wp == &c->btree_write_point
+ struct write_point *wp;
+ struct open_bucket_ptr *ptr;
+ unsigned open_buckets_reserved = data_type == BCH_DATA_BTREE
? 0 : BTREE_NODE_RESERVE;
+ unsigned nr_ptrs_empty = 0, nr_ptrs_dislike = 0;
int ret;
BUG_ON(!nr_replicas);
-retry:
- ob = lock_writepoint(c, wp);
+
+ wp = writepoint_find(c, data_type, write_point);
+ BUG_ON(wp->type != data_type);
+
+ wp->last_used = sched_clock();
+
+ ob = wp->ob;
+
+ /* does ob have ptrs we don't need? */
+ open_bucket_for_each_ptr(ob, ptr) {
+ if (!ptr->sectors_free)
+ nr_ptrs_empty++;
+ else if (devs && !test_bit(ptr->ptr.dev, devs->d))
+ nr_ptrs_dislike++;
+ }
+
+ ret = open_bucket_add_buckets(c, wp, devs, ob,
+ nr_replicas + nr_ptrs_empty + nr_ptrs_dislike,
+ reserve, cl);
+ if (ret && ret != -EROFS)
+ goto err;
+
+ if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
+ goto alloc_done;
+
+ /*
+ * XXX:
+ * Should this allocation be _forced_ to used the specified device (e.g.
+ * internal migration), or should we fall back to allocating from all
+ * devices?
+ */
+ ret = open_bucket_add_buckets(c, wp, NULL, ob,
+ nr_replicas + nr_ptrs_empty,
+ reserve, cl);
+ if (ret && ret != -EROFS)
+ goto err;
+alloc_done:
+ if (ob->nr_ptrs - nr_ptrs_empty -
+ ((flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) ? nr_ptrs_dislike : 0)
+ < nr_replicas_required) {
+ ret = -EROFS;
+ goto err;
+ }
/*
* If ob->sectors_free == 0, one or more of the buckets ob points to is
@@ -1456,53 +1597,34 @@ retry:
* still needs to find them; instead, we must allocate a new open bucket
* and copy any pointers to non-full buckets into the new open bucket.
*/
- if (!ob || ob->has_full_ptrs) {
- struct open_bucket *new_ob;
-
- new_ob = bch2_open_bucket_get(c, open_buckets_reserved, cl);
- if (IS_ERR(new_ob))
- return new_ob;
-
- mutex_lock(&new_ob->lock);
-
- /*
- * We point the write point at the open_bucket before doing the
- * allocation to avoid a race with shutdown:
- */
- if (race_fault() ||
- cmpxchg(&wp->b, ob, new_ob) != ob) {
- /* We raced: */
- mutex_unlock(&new_ob->lock);
- bch2_open_bucket_put(c, new_ob);
-
- if (ob)
- mutex_unlock(&ob->lock);
- goto retry;
+ BUG_ON(ob->nr_ptrs - nr_ptrs_empty - nr_replicas > nr_ptrs_dislike);
+ nr_ptrs_dislike = ob->nr_ptrs - nr_ptrs_empty - nr_replicas;
+
+ if (nr_ptrs_empty || nr_ptrs_dislike) {
+ ob = bch2_open_bucket_get(c, open_buckets_reserved, cl);
+ if (IS_ERR(ob)) {
+ ret = PTR_ERR(ob);
+ goto err;
}
- if (ob) {
- open_bucket_copy_unused_ptrs(c, new_ob, ob);
- mutex_unlock(&ob->lock);
- bch2_open_bucket_put(c, ob);
- }
+ /* Remove pointers we don't want to use: */
- ob = new_ob;
+ open_bucket_move_ptrs(c, ob, wp->ob, devs, nr_ptrs_dislike);
+ bch2_open_bucket_put(c, wp->ob);
+ wp->ob = ob;
}
- ret = open_bucket_add_buckets(c, wp, ob, nr_replicas,
- nr_replicas_required,
- reserve, cl);
- if (ret) {
- mutex_unlock(&ob->lock);
- return ERR_PTR(ret);
- }
+ BUG_ON(ob->nr_ptrs < nr_replicas_required);
- ob->sectors_free = open_bucket_sectors_free(c, ob, nr_replicas);
+ wp->sectors_free = open_bucket_sectors_free(c, ob, nr_replicas);
- BUG_ON(!ob->sectors_free);
+ BUG_ON(!wp->sectors_free);
verify_not_stale(c, ob);
- return ob;
+ return wp;
+err:
+ mutex_unlock(&wp->lock);
+ return ERR_PTR(ret);
}
/*
@@ -1514,29 +1636,26 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
unsigned sectors)
{
struct bch_extent_ptr tmp;
- bool has_data = false;
- unsigned i;
+ struct open_bucket_ptr *ptr;
/*
* We're keeping any existing pointer k has, and appending new pointers:
* __bch2_write() will only write to the pointers we add here:
*/
- BUG_ON(sectors > ob->sectors_free);
-
- /* didn't use all the ptrs: */
- if (nr_replicas < ob->nr_ptrs)
- has_data = true;
+ for (ptr = ob->ptrs;
+ ptr < ob->ptrs + min_t(u8, ob->nr_ptrs, nr_replicas); ptr++) {
+ struct bch_dev *ca = c->devs[ptr->ptr.dev];
- for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
- EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
+ EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ptr->ptr.dev));
- tmp = ob->ptrs[i];
+ tmp = ptr->ptr;
tmp.cached = bkey_extent_is_cached(&e->k);
- tmp.offset += ob->ptr_offset[i];
+ tmp.offset += ca->mi.bucket_size - ptr->sectors_free;
extent_ptr_append(e, tmp);
- ob->ptr_offset[i] += sectors;
+ BUG_ON(sectors > ptr->sectors_free);
+ ptr->sectors_free -= sectors;
}
}
@@ -1544,25 +1663,27 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
* Append pointers to the space we just allocated to @k, and mark @sectors space
* as allocated out of @ob
*/
-void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
- struct open_bucket *ob)
+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
{
- bool has_data = false;
- unsigned i;
+ struct open_bucket *ob = wp->ob, *new_ob = NULL;
+ struct open_bucket_ptr *ptr;
+ bool empty = false;
- for (i = 0; i < ob->nr_ptrs; i++) {
- if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i]))
- ob->has_full_ptrs = true;
- else
- has_data = true;
- }
+ open_bucket_for_each_ptr(ob, ptr)
+ empty |= !ptr->sectors_free;
+
+ if (empty)
+ new_ob = bch2_open_bucket_get(c, 0, NULL);
- if (likely(has_data))
+ if (!IS_ERR_OR_NULL(new_ob)) {
+ /* writepoint's ref becomes our ref: */
+ wp->ob = new_ob;
+ open_bucket_move_ptrs(c, new_ob, ob, 0, 0);
+ } else {
atomic_inc(&ob->pin);
- else
- BUG_ON(xchg(&wp->b, NULL) != ob);
+ }
- mutex_unlock(&ob->lock);
+ mutex_unlock(&wp->lock);
}
/*
@@ -1583,27 +1704,33 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
* @cl - closure to wait for a bucket
*/
struct open_bucket *bch2_alloc_sectors(struct bch_fs *c,
- struct write_point *wp,
+ enum bch_data_type data_type,
+ struct bch_devs_mask *devs,
+ unsigned long write_point,
struct bkey_i_extent *e,
unsigned nr_replicas,
unsigned nr_replicas_required,
enum alloc_reserve reserve,
+ unsigned flags,
struct closure *cl)
{
+ struct write_point *wp;
struct open_bucket *ob;
- ob = bch2_alloc_sectors_start(c, wp, nr_replicas,
- nr_replicas_required,
- reserve, cl);
- if (IS_ERR_OR_NULL(ob))
- return ob;
+ wp = bch2_alloc_sectors_start(c, data_type, devs, write_point,
+ nr_replicas, nr_replicas_required,
+ reserve, flags, cl);
+ if (IS_ERR_OR_NULL(wp))
+ return ERR_CAST(wp);
- if (e->k.size > ob->sectors_free)
- bch2_key_resize(&e->k, ob->sectors_free);
+ ob = wp->ob;
+
+ if (e->k.size > wp->sectors_free)
+ bch2_key_resize(&e->k, wp->sectors_free);
bch2_alloc_sectors_append_ptrs(c, e, nr_replicas, ob, e->k.size);
- bch2_alloc_sectors_done(c, wp, ob);
+ bch2_alloc_sectors_done(c, wp);
return ob;
}
@@ -1640,8 +1767,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
}
c->fastest_tier = fastest_tier != slowest_tier ? fastest_tier : NULL;
-
- c->promote_write_point.group = &fastest_tier->devs;
+ c->fastest_devs = fastest_tier != slowest_tier ? &fastest_tier->devs : NULL;
if (!fastest_tier)
goto set_capacity;
@@ -1713,49 +1839,61 @@ set_capacity:
closure_wake_up(&c->freelist_wait);
}
+static bool open_bucket_has_device(struct open_bucket *ob,
+ struct bch_dev *ca)
+{
+ struct open_bucket_ptr *ptr;
+ bool ret = false;
+
+ spin_lock(&ob->lock);
+ open_bucket_for_each_ptr(ob, ptr)
+ ret |= ptr->ptr.dev == ca->dev_idx;
+ spin_unlock(&ob->lock);
+
+ return ret;
+}
+
static void bch2_stop_write_point(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp)
{
struct open_bucket *ob;
- struct bch_extent_ptr *ptr;
+ struct closure cl;
- ob = lock_writepoint(c, wp);
- if (!ob)
+ closure_init_stack(&cl);
+retry:
+ mutex_lock(&wp->lock);
+ if (!open_bucket_has_device(wp->ob, ca)) {
+ mutex_unlock(&wp->lock);
return;
+ }
- for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
- if (ptr->dev == ca->dev_idx)
- goto found;
+ ob = bch2_open_bucket_get(c, 0, &cl);
+ if (IS_ERR(ob)) {
+ mutex_unlock(&wp->lock);
+ closure_sync(&cl);
+ goto retry;
+
+ }
- mutex_unlock(&ob->lock);
- return;
-found:
- BUG_ON(xchg(&wp->b, NULL) != ob);
- mutex_unlock(&ob->lock);
+ open_bucket_move_ptrs(c, ob, wp->ob, &ca->self, ob->nr_ptrs);
+ bch2_open_bucket_put(c, wp->ob);
+ wp->ob = ob;
- /* Drop writepoint's ref: */
- bch2_open_bucket_put(c, ob);
+ mutex_unlock(&wp->lock);
}
static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
{
- struct bch_extent_ptr *ptr;
struct open_bucket *ob;
+ bool ret = false;
for (ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++)
- if (atomic_read(&ob->pin)) {
- mutex_lock(&ob->lock);
- for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
- if (ptr->dev == ca->dev_idx) {
- mutex_unlock(&ob->lock);
- return true;
- }
- mutex_unlock(&ob->lock);
- }
+ if (atomic_read(&ob->pin))
+ ret |= open_bucket_has_device(ob, ca);
- return false;
+ return ret;
}
/* device goes ro: */
@@ -1782,11 +1920,6 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
/* Next, close write points that point to this device... */
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_stop_write_point(c, ca, &c->write_points[i]);
-
- bch2_stop_write_point(c, ca, &ca->copygc_write_point);
- bch2_stop_write_point(c, ca, &c->promote_write_point);
- bch2_stop_write_point(c, ca, &c->tiers[ca->mi.tier].wp);
- bch2_stop_write_point(c, ca, &c->migration_write_point);
bch2_stop_write_point(c, ca, &c->btree_write_point);
mutex_lock(&c->btree_reserve_cache_lock);
@@ -1880,35 +2013,44 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
void bch2_fs_allocator_init(struct bch_fs *c)
{
- unsigned i;
+ struct open_bucket *ob;
+ struct write_point *wp;
- INIT_LIST_HEAD(&c->open_buckets_open);
- INIT_LIST_HEAD(&c->open_buckets_free);
+ mutex_init(&c->write_points_hash_lock);
+ init_rwsem(&c->alloc_gc_lock);
spin_lock_init(&c->open_buckets_lock);
bch2_prio_timer_init(c, READ);
bch2_prio_timer_init(c, WRITE);
/* open bucket 0 is a sentinal NULL: */
- mutex_init(&c->open_buckets[0].lock);
- INIT_LIST_HEAD(&c->open_buckets[0].list);
+ spin_lock_init(&c->open_buckets[0].lock);
- for (i = 1; i < ARRAY_SIZE(c->open_buckets); i++) {
- mutex_init(&c->open_buckets[i].lock);
+ for (ob = c->open_buckets + 1;
+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
+ spin_lock_init(&ob->lock);
c->open_buckets_nr_free++;
- list_add(&c->open_buckets[i].list, &c->open_buckets_free);
+
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
}
- c->journal.wp.type = BCH_DATA_JOURNAL;
+ mutex_init(&c->btree_write_point.lock);
c->btree_write_point.type = BCH_DATA_BTREE;
+ c->btree_write_point.ob = bch2_open_bucket_get(c, 0, NULL);
+ BUG_ON(IS_ERR(c->btree_write_point.ob));
- for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
- c->tiers[i].wp.type = BCH_DATA_USER;
+ for (wp = c->write_points;
+ wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
+ mutex_init(&wp->lock);
+ wp->type = BCH_DATA_USER;
+ wp->ob = bch2_open_bucket_get(c, 0, NULL);
+ wp->last_used = sched_clock();
- for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
- c->write_points[i].type = BCH_DATA_USER;
+ wp->write_point = (unsigned long) wp;
+ hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
- c->promote_write_point.type = BCH_DATA_USER;
- c->migration_write_point.type = BCH_DATA_USER;
+ BUG_ON(IS_ERR(wp->ob));
+ }
c->pd_controllers_update_seconds = 5;
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);