diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-13 19:21:13 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-13 19:21:13 -0400 |
commit | 3765483ff0cf9abd0243fcafe11aebd0f9beb03d (patch) | |
tree | 232ba4c4c17b2c2579782b01422ae68994c9b5bf /libbcachefs/buckets.c | |
parent | d34e731082d8fcd710c2af6377a3b7fa927c8451 (diff) |
Update bcachefs sources to f05b3c1af9 bcachefs: Improve bucket_alloc_fail tracepointv0.19
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r-- | libbcachefs/buckets.c | 315 |
1 files changed, 128 insertions, 187 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index d5226375..fbce6cdf 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -279,29 +279,24 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -static inline int is_unavailable_bucket(struct bucket_mark m) +static inline int is_unavailable_bucket(struct bkey_alloc_unpacked a) { - return !is_available_bucket(m); + return a.dirty_sectors || a.stripe; } static inline int bucket_sectors_fragmented(struct bch_dev *ca, - struct bucket_mark m) + struct bkey_alloc_unpacked a) { - return m.dirty_sectors - ? max(0, (int) ca->mi.bucket_size - (int) m.dirty_sectors) + return a.dirty_sectors + ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors) : 0; } -static inline int is_stripe_data_bucket(struct bucket_mark m) +static inline enum bch_data_type bucket_type(struct bkey_alloc_unpacked a) { - return m.stripe && m.data_type != BCH_DATA_parity; -} - -static inline enum bch_data_type bucket_type(struct bucket_mark m) -{ - return m.cached_sectors && !m.dirty_sectors + return a.cached_sectors && !a.dirty_sectors ? BCH_DATA_cached - : m.data_type; + : a.data_type; } static inline void account_bucket(struct bch_fs_usage *fs_usage, @@ -316,7 +311,8 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage, } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - struct bucket_mark old, struct bucket_mark new, + struct bkey_alloc_unpacked old, + struct bkey_alloc_unpacked new, u64 journal_seq, bool gc) { struct bch_fs_usage *fs_usage; @@ -347,9 +343,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); preempt_enable(); +} + +static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, + struct bucket old, struct bucket new, + u64 journal_seq, bool gc) +{ + struct bkey_alloc_unpacked old_a = { + .gen = old.gen, + .data_type = old.data_type, + .dirty_sectors = old.dirty_sectors, + .cached_sectors = old.cached_sectors, + .stripe = old.stripe, + }; + struct bkey_alloc_unpacked new_a = { + .gen = new.gen, + .data_type = new.data_type, + .dirty_sectors = new.dirty_sectors, + .cached_sectors = new.cached_sectors, + .stripe = new.stripe, + }; - if (!is_available_bucket(old) && is_available_bucket(new)) - bch2_wake_allocator(ca); + bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); } static inline int __update_replicas(struct bch_fs *c, @@ -484,19 +499,6 @@ static inline void update_cached_sectors_list(struct btree_trans *trans, update_replicas_list(trans, &r.e, sectors); } -void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, - size_t b, bool owned_by_allocator) -{ - struct bucket *g = bucket(ca, b); - struct bucket_mark old, new; - - old = bucket_cmpxchg(g, new, ({ - new.owned_by_allocator = owned_by_allocator; - })); - - BUG_ON(owned_by_allocator == old.owned_by_allocator); -} - int bch2_mark_alloc(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s_c new, unsigned flags) @@ -507,8 +509,6 @@ int bch2_mark_alloc(struct btree_trans *trans, struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old); struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new); struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev); - struct bucket *g; - struct bucket_mark old_m, m; int ret = 0; if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket || @@ -555,28 +555,46 @@ int bch2_mark_alloc(struct btree_trans *trans, } } + if (!new_u.data_type && + (!new_u.journal_seq || new_u.journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if ((flags & BTREE_TRIGGER_INSERT) && + new_u.need_discard && + !new_u.journal_seq) + bch2_do_discards(c); + + if (!old_u.data_type && + new_u.data_type && + should_invalidate_buckets(ca)) + bch2_do_invalidates(c); + + if (bucket_state(new_u) == BUCKET_need_gc_gens) { + atomic_inc(&c->kick_gc); + wake_up_process(c->gc_thread); + } + percpu_down_read(&c->mark_lock); if (!gc && new_u.gen != old_u.gen) *bucket_gen(ca, new_u.bucket) = new_u.gen; - g = __bucket(ca, new_u.bucket, gc); + bch2_dev_usage_update(c, ca, old_u, new_u, journal_seq, gc); + + if (gc) { + struct bucket *g = gc_bucket(ca, new_u.bucket); - old_m = bucket_cmpxchg(g, m, ({ - m.gen = new_u.gen; - m.data_type = new_u.data_type; - m.dirty_sectors = new_u.dirty_sectors; - m.cached_sectors = new_u.cached_sectors; - m.stripe = new_u.stripe != 0; - })); + bucket_lock(g); - bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc); + g->gen_valid = 1; + g->gen = new_u.gen; + g->data_type = new_u.data_type; + g->stripe = new_u.stripe; + g->stripe_redundancy = new_u.stripe_redundancy; + g->dirty_sectors = new_u.dirty_sectors; + g->cached_sectors = new_u.cached_sectors; - g->io_time[READ] = new_u.read_time; - g->io_time[WRITE] = new_u.write_time; - g->oldest_gen = new_u.oldest_gen; - g->gen_valid = 1; - g->stripe = new_u.stripe; - g->stripe_redundancy = new_u.stripe_redundancy; + bucket_unlock(g); + } percpu_up_read(&c->mark_lock); /* @@ -585,9 +603,9 @@ int bch2_mark_alloc(struct btree_trans *trans, */ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && - old_m.cached_sectors) { + old_u.cached_sectors) { ret = update_cached_sectors(c, new, ca->dev_idx, - -old_m.cached_sectors, + -old_u.cached_sectors, journal_seq, gc); if (ret) { bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors"); @@ -595,29 +613,18 @@ int bch2_mark_alloc(struct btree_trans *trans, } trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket), - old_m.cached_sectors); + old_u.cached_sectors); } return 0; } -#define checked_add(a, b) \ -({ \ - unsigned _res = (unsigned) (a) + (b); \ - bool overflow = _res > U16_MAX; \ - if (overflow) \ - _res = U16_MAX; \ - (a) = _res; \ - overflow; \ -}) - void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, enum bch_data_type data_type, unsigned sectors, struct gc_pos pos, unsigned flags) { - struct bucket *g; - struct bucket_mark old, new; + struct bucket old, new, *g; bool overflow; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -632,10 +639,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, percpu_down_read(&c->mark_lock); g = gc_bucket(ca, b); - old = bucket_cmpxchg(g, new, ({ - new.data_type = data_type; - overflow = checked_add(new.dirty_sectors, sectors); - })); + + bucket_lock(g); + old = *g; + + g->data_type = data_type; + g->dirty_sectors += sectors; + overflow = g->dirty_sectors < sectors; + + new = *g; + bucket_unlock(g); bch2_fs_inconsistent_on(old.data_type && old.data_type != data_type, c, @@ -649,7 +662,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, bch2_data_types[old.data_type ?: data_type], old.dirty_sectors, sectors); - bch2_dev_usage_update(c, ca, old, new, 0, true); + bch2_dev_usage_update_m(c, ca, old, new, 0, true); percpu_up_read(&c->mark_lock); } @@ -669,7 +682,7 @@ static int check_bucket_ref(struct bch_fs *c, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 b_gen, u8 bucket_data_type, - u16 dirty_sectors, u16 cached_sectors) + u32 dirty_sectors, u32 cached_sectors) { struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); @@ -737,7 +750,7 @@ static int check_bucket_ref(struct bch_fs *c, goto err; } - if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { + if ((unsigned) (bucket_sectors + sectors) > U32_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" "while marking %s", @@ -768,8 +781,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g; - struct bucket_mark new, old; + struct bucket old, new, *g; struct printbuf buf = PRINTBUF; int ret = 0; @@ -781,34 +793,38 @@ static int mark_stripe_bucket(struct btree_trans *trans, buf.atomic++; g = PTR_GC_BUCKET(ca, ptr); - if (g->mark.dirty_sectors || + if (g->dirty_sectors || (g->stripe && g->stripe != k.k->p.offset)) { bch2_fs_inconsistent(c, "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen, + ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EINVAL; goto err; } - old = bucket_cmpxchg(g, new, ({ - ret = check_bucket_ref(c, k, ptr, sectors, data_type, - new.gen, new.data_type, - new.dirty_sectors, new.cached_sectors); - if (ret) - goto err; + bucket_lock(g); + old = *g; - new.dirty_sectors += sectors; - if (data_type) - new.data_type = data_type; + ret = check_bucket_ref(c, k, ptr, sectors, data_type, + new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) { + bucket_unlock(g); + goto err; + } - new.stripe = true; - })); + new.dirty_sectors += sectors; + if (data_type) + new.data_type = data_type; g->stripe = k.k->p.offset; g->stripe_redundancy = s->nr_redundant; - bch2_dev_usage_update(c, ca, old, new, journal_seq, true); + new = *g; + bucket_unlock(g); + + bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); err: percpu_up_read(&c->mark_lock); printbuf_exit(&buf); @@ -820,9 +836,9 @@ static int __mark_pointer(struct btree_trans *trans, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 bucket_gen, u8 *bucket_data_type, - u16 *dirty_sectors, u16 *cached_sectors) + u32 *dirty_sectors, u32 *cached_sectors) { - u16 *dst_sectors = !ptr->cached + u32 *dst_sectors = !ptr->cached ? dirty_sectors : cached_sectors; int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type, @@ -846,11 +862,9 @@ static int bch2_mark_pointer(struct btree_trans *trans, { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bucket_mark old, new; struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bucket *g; + struct bucket old, new, *g; u8 bucket_data_type; - u64 v; int ret = 0; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -858,30 +872,27 @@ static int bch2_mark_pointer(struct btree_trans *trans, percpu_down_read(&c->mark_lock); g = PTR_GC_BUCKET(ca, &p.ptr); - v = atomic64_read(&g->_mark.v); - do { - new.v.counter = old.v.counter = v; - bucket_data_type = new.data_type; - - ret = __mark_pointer(trans, k, &p.ptr, sectors, - data_type, new.gen, - &bucket_data_type, - &new.dirty_sectors, - &new.cached_sectors); - if (ret) - goto err; + bucket_lock(g); + old = *g; - new.data_type = bucket_data_type; + bucket_data_type = g->data_type; - if (flags & BTREE_TRIGGER_NOATOMIC) { - g->_mark = new; - break; - } - } while ((v = atomic64_cmpxchg(&g->_mark.v, - old.v.counter, - new.v.counter)) != old.v.counter); + ret = __mark_pointer(trans, k, &p.ptr, sectors, + data_type, g->gen, + &bucket_data_type, + &g->dirty_sectors, + &g->cached_sectors); + if (ret) { + bucket_unlock(g); + goto err; + } + + g->data_type = bucket_data_type; + + new = *g; + bucket_unlock(g); - bch2_dev_usage_update(c, ca, old, new, journal_seq, true); + bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); err: percpu_up_read(&c->mark_lock); @@ -2041,16 +2052,6 @@ recalculate: /* Startup/shutdown: */ -static void buckets_free_rcu(struct rcu_head *rcu) -{ - struct bucket_array *buckets = - container_of(rcu, struct bucket_array, rcu); - - kvpfree(buckets, - sizeof(*buckets) + - buckets->nbuckets * sizeof(struct bucket)); -} - static void bucket_gens_free_rcu(struct rcu_head *rcu) { struct bucket_gens *buckets = @@ -2061,46 +2062,19 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu) int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bucket_array *buckets = NULL, *old_buckets = NULL; struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; unsigned long *buckets_nouse = NULL; - alloc_fifo free[RESERVE_NR]; - alloc_fifo free_inc; - alloc_heap alloc_heap; - - size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, - ca->mi.bucket_size / btree_sectors(c)); - /* XXX: these should be tunable */ - size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); - size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6); - size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), - btree_reserve * 2); - bool resize = ca->buckets[0] != NULL; + bool resize = ca->bucket_gens != NULL; int ret = -ENOMEM; - unsigned i; - memset(&free, 0, sizeof(free)); - memset(&free_inc, 0, sizeof(free_inc)); - memset(&alloc_heap, 0, sizeof(alloc_heap)); - - if (!(buckets = kvpmalloc(sizeof(struct bucket_array) + - nbuckets * sizeof(struct bucket), - GFP_KERNEL|__GFP_ZERO)) || - !(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets, + if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets, GFP_KERNEL|__GFP_ZERO)) || (c->opts.buckets_nouse && !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * sizeof(unsigned long), - GFP_KERNEL|__GFP_ZERO))) || - !init_fifo(&free[RESERVE_MOVINGGC], - copygc_reserve, GFP_KERNEL) || - !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) || - !init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) || - !init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL)) + GFP_KERNEL|__GFP_ZERO)))) goto err; - buckets->first_bucket = ca->mi.first_bucket; - buckets->nbuckets = nbuckets; bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; @@ -2112,15 +2086,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) percpu_down_write(&c->mark_lock); } - old_buckets = bucket_array(ca); old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - size_t n = min(buckets->nbuckets, old_buckets->nbuckets); + size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); - memcpy(buckets->b, - old_buckets->b, - n * sizeof(struct bucket)); memcpy(bucket_gens->b, old_bucket_gens->b, n); @@ -2130,47 +2100,25 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) BITS_TO_LONGS(n) * sizeof(unsigned long)); } - rcu_assign_pointer(ca->buckets[0], buckets); rcu_assign_pointer(ca->bucket_gens, bucket_gens); - buckets = old_buckets; bucket_gens = old_bucket_gens; swap(ca->buckets_nouse, buckets_nouse); + nbuckets = ca->mi.nbuckets; + if (resize) { percpu_up_write(&c->mark_lock); + up_write(&ca->bucket_lock); up_write(&c->gc_lock); } - spin_lock(&c->freelist_lock); - for (i = 0; i < RESERVE_NR; i++) { - fifo_move(&free[i], &ca->free[i]); - swap(ca->free[i], free[i]); - } - fifo_move(&free_inc, &ca->free_inc); - swap(ca->free_inc, free_inc); - spin_unlock(&c->freelist_lock); - - /* with gc lock held, alloc_heap can't be in use: */ - swap(ca->alloc_heap, alloc_heap); - - nbuckets = ca->mi.nbuckets; - - if (resize) - up_write(&ca->bucket_lock); - ret = 0; err: - free_heap(&alloc_heap); - free_fifo(&free_inc); - for (i = 0; i < RESERVE_NR; i++) - free_fifo(&free[i]); kvpfree(buckets_nouse, BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); if (bucket_gens) call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); - if (buckets) - call_rcu(&buckets->rcu, buckets_free_rcu); return ret; } @@ -2179,17 +2127,10 @@ void bch2_dev_buckets_free(struct bch_dev *ca) { unsigned i; - free_heap(&ca->alloc_heap); - free_fifo(&ca->free_inc); - for (i = 0; i < RESERVE_NR; i++) - free_fifo(&ca->free[i]); kvpfree(ca->buckets_nouse, BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(rcu_dereference_protected(ca->bucket_gens, 1), sizeof(struct bucket_gens) + ca->mi.nbuckets); - kvpfree(rcu_dereference_protected(ca->buckets[0], 1), - sizeof(struct bucket_array) + - ca->mi.nbuckets * sizeof(struct bucket)); for (i = 0; i < ARRAY_SIZE(ca->usage); i++) free_percpu(ca->usage[i]); |