summaryrefslogtreecommitdiff
path: root/libbcachefs/buckets.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-03-13 19:21:13 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2022-03-13 19:21:13 -0400
commit3765483ff0cf9abd0243fcafe11aebd0f9beb03d (patch)
tree232ba4c4c17b2c2579782b01422ae68994c9b5bf /libbcachefs/buckets.c
parentd34e731082d8fcd710c2af6377a3b7fa927c8451 (diff)
Update bcachefs sources to f05b3c1af9 bcachefs: Improve bucket_alloc_fail tracepointv0.19
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r--libbcachefs/buckets.c315
1 files changed, 128 insertions, 187 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index d5226375..fbce6cdf 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -279,29 +279,24 @@ bch2_fs_usage_read_short(struct bch_fs *c)
return ret;
}
-static inline int is_unavailable_bucket(struct bucket_mark m)
+static inline int is_unavailable_bucket(struct bkey_alloc_unpacked a)
{
- return !is_available_bucket(m);
+ return a.dirty_sectors || a.stripe;
}
static inline int bucket_sectors_fragmented(struct bch_dev *ca,
- struct bucket_mark m)
+ struct bkey_alloc_unpacked a)
{
- return m.dirty_sectors
- ? max(0, (int) ca->mi.bucket_size - (int) m.dirty_sectors)
+ return a.dirty_sectors
+ ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
: 0;
}
-static inline int is_stripe_data_bucket(struct bucket_mark m)
+static inline enum bch_data_type bucket_type(struct bkey_alloc_unpacked a)
{
- return m.stripe && m.data_type != BCH_DATA_parity;
-}
-
-static inline enum bch_data_type bucket_type(struct bucket_mark m)
-{
- return m.cached_sectors && !m.dirty_sectors
+ return a.cached_sectors && !a.dirty_sectors
? BCH_DATA_cached
- : m.data_type;
+ : a.data_type;
}
static inline void account_bucket(struct bch_fs_usage *fs_usage,
@@ -316,7 +311,8 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
}
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
- struct bucket_mark old, struct bucket_mark new,
+ struct bkey_alloc_unpacked old,
+ struct bkey_alloc_unpacked new,
u64 journal_seq, bool gc)
{
struct bch_fs_usage *fs_usage;
@@ -347,9 +343,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
preempt_enable();
+}
+
+static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket old, struct bucket new,
+ u64 journal_seq, bool gc)
+{
+ struct bkey_alloc_unpacked old_a = {
+ .gen = old.gen,
+ .data_type = old.data_type,
+ .dirty_sectors = old.dirty_sectors,
+ .cached_sectors = old.cached_sectors,
+ .stripe = old.stripe,
+ };
+ struct bkey_alloc_unpacked new_a = {
+ .gen = new.gen,
+ .data_type = new.data_type,
+ .dirty_sectors = new.dirty_sectors,
+ .cached_sectors = new.cached_sectors,
+ .stripe = new.stripe,
+ };
- if (!is_available_bucket(old) && is_available_bucket(new))
- bch2_wake_allocator(ca);
+ bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
}
static inline int __update_replicas(struct bch_fs *c,
@@ -484,19 +499,6 @@ static inline void update_cached_sectors_list(struct btree_trans *trans,
update_replicas_list(trans, &r.e, sectors);
}
-void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
- size_t b, bool owned_by_allocator)
-{
- struct bucket *g = bucket(ca, b);
- struct bucket_mark old, new;
-
- old = bucket_cmpxchg(g, new, ({
- new.owned_by_allocator = owned_by_allocator;
- }));
-
- BUG_ON(owned_by_allocator == old.owned_by_allocator);
-}
-
int bch2_mark_alloc(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
@@ -507,8 +509,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
- struct bucket *g;
- struct bucket_mark old_m, m;
int ret = 0;
if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
@@ -555,28 +555,46 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
}
+ if (!new_u.data_type &&
+ (!new_u.journal_seq || new_u.journal_seq < c->journal.flushed_seq_ondisk))
+ closure_wake_up(&c->freelist_wait);
+
+ if ((flags & BTREE_TRIGGER_INSERT) &&
+ new_u.need_discard &&
+ !new_u.journal_seq)
+ bch2_do_discards(c);
+
+ if (!old_u.data_type &&
+ new_u.data_type &&
+ should_invalidate_buckets(ca))
+ bch2_do_invalidates(c);
+
+ if (bucket_state(new_u) == BUCKET_need_gc_gens) {
+ atomic_inc(&c->kick_gc);
+ wake_up_process(c->gc_thread);
+ }
+
percpu_down_read(&c->mark_lock);
if (!gc && new_u.gen != old_u.gen)
*bucket_gen(ca, new_u.bucket) = new_u.gen;
- g = __bucket(ca, new_u.bucket, gc);
+ bch2_dev_usage_update(c, ca, old_u, new_u, journal_seq, gc);
+
+ if (gc) {
+ struct bucket *g = gc_bucket(ca, new_u.bucket);
- old_m = bucket_cmpxchg(g, m, ({
- m.gen = new_u.gen;
- m.data_type = new_u.data_type;
- m.dirty_sectors = new_u.dirty_sectors;
- m.cached_sectors = new_u.cached_sectors;
- m.stripe = new_u.stripe != 0;
- }));
+ bucket_lock(g);
- bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc);
+ g->gen_valid = 1;
+ g->gen = new_u.gen;
+ g->data_type = new_u.data_type;
+ g->stripe = new_u.stripe;
+ g->stripe_redundancy = new_u.stripe_redundancy;
+ g->dirty_sectors = new_u.dirty_sectors;
+ g->cached_sectors = new_u.cached_sectors;
- g->io_time[READ] = new_u.read_time;
- g->io_time[WRITE] = new_u.write_time;
- g->oldest_gen = new_u.oldest_gen;
- g->gen_valid = 1;
- g->stripe = new_u.stripe;
- g->stripe_redundancy = new_u.stripe_redundancy;
+ bucket_unlock(g);
+ }
percpu_up_read(&c->mark_lock);
/*
@@ -585,9 +603,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
*/
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
- old_m.cached_sectors) {
+ old_u.cached_sectors) {
ret = update_cached_sectors(c, new, ca->dev_idx,
- -old_m.cached_sectors,
+ -old_u.cached_sectors,
journal_seq, gc);
if (ret) {
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
@@ -595,29 +613,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket),
- old_m.cached_sectors);
+ old_u.cached_sectors);
}
return 0;
}
-#define checked_add(a, b) \
-({ \
- unsigned _res = (unsigned) (a) + (b); \
- bool overflow = _res > U16_MAX; \
- if (overflow) \
- _res = U16_MAX; \
- (a) = _res; \
- overflow; \
-})
-
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type data_type,
unsigned sectors, struct gc_pos pos,
unsigned flags)
{
- struct bucket *g;
- struct bucket_mark old, new;
+ struct bucket old, new, *g;
bool overflow;
BUG_ON(!(flags & BTREE_TRIGGER_GC));
@@ -632,10 +639,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, b);
- old = bucket_cmpxchg(g, new, ({
- new.data_type = data_type;
- overflow = checked_add(new.dirty_sectors, sectors);
- }));
+
+ bucket_lock(g);
+ old = *g;
+
+ g->data_type = data_type;
+ g->dirty_sectors += sectors;
+ overflow = g->dirty_sectors < sectors;
+
+ new = *g;
+ bucket_unlock(g);
bch2_fs_inconsistent_on(old.data_type &&
old.data_type != data_type, c,
@@ -649,7 +662,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
bch2_data_types[old.data_type ?: data_type],
old.dirty_sectors, sectors);
- bch2_dev_usage_update(c, ca, old, new, 0, true);
+ bch2_dev_usage_update_m(c, ca, old, new, 0, true);
percpu_up_read(&c->mark_lock);
}
@@ -669,7 +682,7 @@ static int check_bucket_ref(struct bch_fs *c,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 b_gen, u8 bucket_data_type,
- u16 dirty_sectors, u16 cached_sectors)
+ u32 dirty_sectors, u32 cached_sectors)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
@@ -737,7 +750,7 @@ static int check_bucket_ref(struct bch_fs *c,
goto err;
}
- if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
+ if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
"while marking %s",
@@ -768,8 +781,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g;
- struct bucket_mark new, old;
+ struct bucket old, new, *g;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -781,34 +793,38 @@ static int mark_stripe_bucket(struct btree_trans *trans,
buf.atomic++;
g = PTR_GC_BUCKET(ca, ptr);
- if (g->mark.dirty_sectors ||
+ if (g->dirty_sectors ||
(g->stripe && g->stripe != k.k->p.offset)) {
bch2_fs_inconsistent(c,
"bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
- ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
+ ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EINVAL;
goto err;
}
- old = bucket_cmpxchg(g, new, ({
- ret = check_bucket_ref(c, k, ptr, sectors, data_type,
- new.gen, new.data_type,
- new.dirty_sectors, new.cached_sectors);
- if (ret)
- goto err;
+ bucket_lock(g);
+ old = *g;
- new.dirty_sectors += sectors;
- if (data_type)
- new.data_type = data_type;
+ ret = check_bucket_ref(c, k, ptr, sectors, data_type,
+ new.gen, new.data_type,
+ new.dirty_sectors, new.cached_sectors);
+ if (ret) {
+ bucket_unlock(g);
+ goto err;
+ }
- new.stripe = true;
- }));
+ new.dirty_sectors += sectors;
+ if (data_type)
+ new.data_type = data_type;
g->stripe = k.k->p.offset;
g->stripe_redundancy = s->nr_redundant;
- bch2_dev_usage_update(c, ca, old, new, journal_seq, true);
+ new = *g;
+ bucket_unlock(g);
+
+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
err:
percpu_up_read(&c->mark_lock);
printbuf_exit(&buf);
@@ -820,9 +836,9 @@ static int __mark_pointer(struct btree_trans *trans,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 bucket_gen, u8 *bucket_data_type,
- u16 *dirty_sectors, u16 *cached_sectors)
+ u32 *dirty_sectors, u32 *cached_sectors)
{
- u16 *dst_sectors = !ptr->cached
+ u32 *dst_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type,
@@ -846,11 +862,9 @@ static int bch2_mark_pointer(struct btree_trans *trans,
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
- struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- struct bucket *g;
+ struct bucket old, new, *g;
u8 bucket_data_type;
- u64 v;
int ret = 0;
BUG_ON(!(flags & BTREE_TRIGGER_GC));
@@ -858,30 +872,27 @@ static int bch2_mark_pointer(struct btree_trans *trans,
percpu_down_read(&c->mark_lock);
g = PTR_GC_BUCKET(ca, &p.ptr);
- v = atomic64_read(&g->_mark.v);
- do {
- new.v.counter = old.v.counter = v;
- bucket_data_type = new.data_type;
-
- ret = __mark_pointer(trans, k, &p.ptr, sectors,
- data_type, new.gen,
- &bucket_data_type,
- &new.dirty_sectors,
- &new.cached_sectors);
- if (ret)
- goto err;
+ bucket_lock(g);
+ old = *g;
- new.data_type = bucket_data_type;
+ bucket_data_type = g->data_type;
- if (flags & BTREE_TRIGGER_NOATOMIC) {
- g->_mark = new;
- break;
- }
- } while ((v = atomic64_cmpxchg(&g->_mark.v,
- old.v.counter,
- new.v.counter)) != old.v.counter);
+ ret = __mark_pointer(trans, k, &p.ptr, sectors,
+ data_type, g->gen,
+ &bucket_data_type,
+ &g->dirty_sectors,
+ &g->cached_sectors);
+ if (ret) {
+ bucket_unlock(g);
+ goto err;
+ }
+
+ g->data_type = bucket_data_type;
+
+ new = *g;
+ bucket_unlock(g);
- bch2_dev_usage_update(c, ca, old, new, journal_seq, true);
+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
err:
percpu_up_read(&c->mark_lock);
@@ -2041,16 +2052,6 @@ recalculate:
/* Startup/shutdown: */
-static void buckets_free_rcu(struct rcu_head *rcu)
-{
- struct bucket_array *buckets =
- container_of(rcu, struct bucket_array, rcu);
-
- kvpfree(buckets,
- sizeof(*buckets) +
- buckets->nbuckets * sizeof(struct bucket));
-}
-
static void bucket_gens_free_rcu(struct rcu_head *rcu)
{
struct bucket_gens *buckets =
@@ -2061,46 +2062,19 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu)
int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
- struct bucket_array *buckets = NULL, *old_buckets = NULL;
struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
unsigned long *buckets_nouse = NULL;
- alloc_fifo free[RESERVE_NR];
- alloc_fifo free_inc;
- alloc_heap alloc_heap;
-
- size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
- ca->mi.bucket_size / btree_sectors(c));
- /* XXX: these should be tunable */
- size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
- size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6);
- size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
- btree_reserve * 2);
- bool resize = ca->buckets[0] != NULL;
+ bool resize = ca->bucket_gens != NULL;
int ret = -ENOMEM;
- unsigned i;
- memset(&free, 0, sizeof(free));
- memset(&free_inc, 0, sizeof(free_inc));
- memset(&alloc_heap, 0, sizeof(alloc_heap));
-
- if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
- nbuckets * sizeof(struct bucket),
- GFP_KERNEL|__GFP_ZERO)) ||
- !(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
+ if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
GFP_KERNEL|__GFP_ZERO)) ||
(c->opts.buckets_nouse &&
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
- GFP_KERNEL|__GFP_ZERO))) ||
- !init_fifo(&free[RESERVE_MOVINGGC],
- copygc_reserve, GFP_KERNEL) ||
- !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
- !init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) ||
- !init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
+ GFP_KERNEL|__GFP_ZERO))))
goto err;
- buckets->first_bucket = ca->mi.first_bucket;
- buckets->nbuckets = nbuckets;
bucket_gens->first_bucket = ca->mi.first_bucket;
bucket_gens->nbuckets = nbuckets;
@@ -2112,15 +2086,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
percpu_down_write(&c->mark_lock);
}
- old_buckets = bucket_array(ca);
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
- size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
+ size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
- memcpy(buckets->b,
- old_buckets->b,
- n * sizeof(struct bucket));
memcpy(bucket_gens->b,
old_bucket_gens->b,
n);
@@ -2130,47 +2100,25 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
BITS_TO_LONGS(n) * sizeof(unsigned long));
}
- rcu_assign_pointer(ca->buckets[0], buckets);
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
- buckets = old_buckets;
bucket_gens = old_bucket_gens;
swap(ca->buckets_nouse, buckets_nouse);
+ nbuckets = ca->mi.nbuckets;
+
if (resize) {
percpu_up_write(&c->mark_lock);
+ up_write(&ca->bucket_lock);
up_write(&c->gc_lock);
}
- spin_lock(&c->freelist_lock);
- for (i = 0; i < RESERVE_NR; i++) {
- fifo_move(&free[i], &ca->free[i]);
- swap(ca->free[i], free[i]);
- }
- fifo_move(&free_inc, &ca->free_inc);
- swap(ca->free_inc, free_inc);
- spin_unlock(&c->freelist_lock);
-
- /* with gc lock held, alloc_heap can't be in use: */
- swap(ca->alloc_heap, alloc_heap);
-
- nbuckets = ca->mi.nbuckets;
-
- if (resize)
- up_write(&ca->bucket_lock);
-
ret = 0;
err:
- free_heap(&alloc_heap);
- free_fifo(&free_inc);
- for (i = 0; i < RESERVE_NR; i++)
- free_fifo(&free[i]);
kvpfree(buckets_nouse,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
if (bucket_gens)
call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
- if (buckets)
- call_rcu(&buckets->rcu, buckets_free_rcu);
return ret;
}
@@ -2179,17 +2127,10 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
{
unsigned i;
- free_heap(&ca->alloc_heap);
- free_fifo(&ca->free_inc);
- for (i = 0; i < RESERVE_NR; i++)
- free_fifo(&ca->free[i]);
kvpfree(ca->buckets_nouse,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
sizeof(struct bucket_gens) + ca->mi.nbuckets);
- kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
- sizeof(struct bucket_array) +
- ca->mi.nbuckets * sizeof(struct bucket));
for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
free_percpu(ca->usage[i]);