diff options
Diffstat (limited to 'fs/bcachefs/alloc_foreground.c')
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 407 |
1 files changed, 210 insertions, 197 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1014ebe63323..fd1415524e46 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -106,20 +106,20 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) return; } - spin_lock(&ob->lock); - ob->valid = false; - ob->data_type = 0; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = false; + ob->data_type = 0; + } - spin_lock(&c->freelist_lock); - bch2_open_bucket_hash_remove(c, ob); + scoped_guard(spinlock, &c->freelist_lock) { + bch2_open_bucket_hash_remove(c, ob); - ob->freelist = c->open_buckets_freelist; - c->open_buckets_freelist = ob - c->open_buckets; + ob->freelist = c->open_buckets_freelist; + c->open_buckets_freelist = ob - c->open_buckets; - c->open_buckets_nr_free++; - ca->nr_open_buckets--; - spin_unlock(&c->freelist_lock); + c->open_buckets_nr_free++; + ca->nr_open_buckets--; + } closure_wake_up(&c->open_buckets_wait); } @@ -164,14 +164,14 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) BUG_ON(c->open_buckets_partial_nr >= ARRAY_SIZE(c->open_buckets_partial)); - spin_lock(&c->freelist_lock); - scoped_guard(rcu) + scoped_guard(spinlock, &c->freelist_lock) { + guard(rcu)(); bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++; - ob->on_partial_list = true; - c->open_buckets_partial[c->open_buckets_partial_nr++] = - ob - c->open_buckets; - spin_unlock(&c->freelist_lock); + ob->on_partial_list = true; + c->open_buckets_partial[c->open_buckets_partial_nr++] = + ob - c->open_buckets; + } closure_wake_up(&c->open_buckets_wait); closure_wake_up(&c->freelist_wait); @@ -219,33 +219,31 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, return NULL; } - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { if (cl) closure_wait(&c->open_buckets_wait, cl); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); - spin_unlock(&c->freelist_lock); return ERR_PTR(bch_err_throw(c, open_buckets_empty)); } /* Recheck under lock: */ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { - spin_unlock(&c->freelist_lock); req->counters.skipped_open++; return NULL; } struct open_bucket *ob = bch2_open_bucket_alloc(c); - spin_lock(&ob->lock); - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; - ob->gen = gen; - ob->bucket = bucket; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = true; + ob->sectors_free = ca->mi.bucket_size; + ob->dev = ca->dev_idx; + ob->gen = gen; + ob->bucket = bucket; + } ca->nr_open_buckets++; bch2_open_bucket_hash_add(c, ob); @@ -253,7 +251,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false); track_event_change(&c->times[BCH_TIME_blocked_allocate], false); - spin_unlock(&c->freelist_lock); return ob; } @@ -453,7 +450,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct closure *cl, struct open_bucket *ob) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printbuf_tabstop_push(&buf, 24); @@ -480,8 +477,6 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "err\t%s\n", bch2_err_str(PTR_ERR(ob))); trace_bucket_alloc_fail(c, buf.buf); } - - printbuf_exit(&buf); } /** @@ -511,7 +506,8 @@ again: bch2_dev_usage_read_fast(ca, &req->usage); avail = dev_buckets_free(ca, req->usage, req->watermark); - if (req->usage.buckets[BCH_DATA_need_discard] > avail) + if (req->usage.buckets[BCH_DATA_need_discard] > + min(avail, ca->mi.nbuckets >> 7)) bch2_dev_do_discards(ca); if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail) @@ -588,7 +584,8 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, .ca = ca, }; - bch2_trans_do(c, + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } @@ -706,19 +703,13 @@ static int add_new_bucket(struct bch_fs *c, inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct alloc_request *req, struct dev_stripe_state *stripe, - struct closure *_cl) + struct closure *cl) { struct bch_fs *c = trans->c; - struct closure *cl = NULL; int ret = 0; BUG_ON(req->nr_effective >= req->nr_replicas); - /* - * Try nonblocking first, so that if one device is full we'll try from - * other devices: - */ -retry_blocking: bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc, &req->devs_sorted); darray_for_each(req->devs_sorted, i) { @@ -744,18 +735,16 @@ retry_blocking: continue; } - if (add_new_bucket(c, req, ob)) - return 0; - } - - if (ret && - !bch2_err_matches(ret, BCH_ERR_transaction_restart) && - cl != _cl) { - cl = _cl; - goto retry_blocking; + ret = add_new_bucket(c, req, ob); + if (ret) + break; } - return ret ?: bch_err_throw(c, insufficient_devices); + if (ret == 1) + return 0; + if (ret) + return ret; + return bch_err_throw(c, insufficient_devices); } /* Allocate from stripes: */ @@ -773,6 +762,12 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret = 0; + if (req->nr_replicas < 2) + return 0; + + if (ec_open_bucket(c, &req->ptrs)) + return 0; + struct ec_stripe_head *h = bch2_ec_stripe_head_get(trans, req, 0, cl); if (IS_ERR(h)) @@ -849,17 +844,15 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c, static int bucket_alloc_set_partial(struct bch_fs *c, struct alloc_request *req) { - int i, ret = 0; - if (!c->open_buckets_partial_nr) return 0; - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (!c->open_buckets_partial_nr) - goto unlock; + return 0; - for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { + for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; if (want_bucket(c, req, ob)) { @@ -879,16 +872,88 @@ static int bucket_alloc_set_partial(struct bch_fs *c, scoped_guard(rcu) bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - ret = add_new_bucket(c, req, ob); + int ret = add_new_bucket(c, req, ob); if (ret) - break; + return ret; } } -unlock: - spin_unlock(&c->freelist_lock); + + return 0; +} + +static int __open_bucket_add_buckets(struct btree_trans *trans, + struct alloc_request *req, + struct closure *_cl) +{ + struct bch_fs *c = trans->c; + struct open_bucket *ob; + struct closure *cl = NULL; + unsigned i; + int ret; + + req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); + + /* Don't allocate from devices we already have pointers to: */ + darray_for_each(*req->devs_have, i) + __clear_bit(*i, req->devs_may_alloc.d); + + open_bucket_for_each(c, &req->ptrs, ob, i) + __clear_bit(ob->dev, req->devs_may_alloc.d); + + ret = bucket_alloc_set_writepoint(c, req); + if (ret) + return ret; + + ret = bucket_alloc_set_partial(c, req); + if (ret) + return ret; + + if (req->ec) { + ret = bucket_alloc_from_stripe(trans, req, _cl); + } else { +retry_blocking: + /* + * Try nonblocking first, so that if one device is full we'll try from + * other devices: + */ + ret = bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl); + if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && + !cl && _cl) { + cl = _cl; + goto retry_blocking; + } + } + return ret; } +static int open_bucket_add_buckets(struct btree_trans *trans, + struct alloc_request *req, + struct closure *cl) +{ + int ret; + + if (req->ec && !ec_open_bucket(trans->c, &req->ptrs)) { + ret = __open_bucket_add_buckets(trans, req, cl); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_operation_blocked) || + bch2_err_matches(ret, BCH_ERR_freelist_empty) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + return ret; + if (req->nr_effective >= req->nr_replicas) + return 0; + } + + bool ec = false; + swap(ec, req->ec); + ret = __open_bucket_add_buckets(trans, req, cl); + swap(ec, req->ec); + + return ret < 0 ? ret : 0; +} + /** * should_drop_bucket - check if this is open_bucket should go away * @ob: open_bucket to predicate on @@ -909,23 +974,18 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, return ob->ec != NULL; } else if (ca) { bool drop = ob->dev == ca->dev_idx; - struct open_bucket *ob2; - unsigned i; if (!drop && ob->ec) { - unsigned nr_blocks; - - mutex_lock(&ob->ec->lock); - nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; + guard(mutex)(&ob->ec->lock); + unsigned nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; - for (i = 0; i < nr_blocks; i++) { + for (unsigned i = 0; i < nr_blocks; i++) { if (!ob->ec->blocks[i]) continue; - ob2 = c->open_buckets + ob->ec->blocks[i]; + struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } - mutex_unlock(&ob->ec->lock); } return drop; @@ -941,14 +1001,13 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) if (should_drop_bucket(ob, c, ca, ec)) bch2_open_bucket_put(c, ob); else ob_push(c, &ptrs, ob); wp->ptrs = ptrs; - mutex_unlock(&wp->lock); } void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, @@ -964,39 +1023,37 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); - mutex_lock(&c->btree_reserve_cache_lock); - while (c->btree_reserve_cache_nr) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + scoped_guard(mutex, &c->btree_reserve_cache_lock) + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = + &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - bch2_open_buckets_put(c, &a->ob); - } - mutex_unlock(&c->btree_reserve_cache_lock); + bch2_open_buckets_put(c, &a->ob); + } - spin_lock(&c->freelist_lock); i = 0; - while (i < c->open_buckets_partial_nr) { - struct open_bucket *ob = - c->open_buckets + c->open_buckets_partial[i]; - - if (should_drop_bucket(ob, c, ca, ec)) { - --c->open_buckets_partial_nr; - swap(c->open_buckets_partial[i], - c->open_buckets_partial[c->open_buckets_partial_nr]); - - ob->on_partial_list = false; - - scoped_guard(rcu) - bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - - spin_unlock(&c->freelist_lock); - bch2_open_bucket_put(c, ob); - spin_lock(&c->freelist_lock); - } else { - i++; + scoped_guard(spinlock, &c->freelist_lock) + while (i < c->open_buckets_partial_nr) { + struct open_bucket *ob = + c->open_buckets + c->open_buckets_partial[i]; + + if (should_drop_bucket(ob, c, ca, ec)) { + --c->open_buckets_partial_nr; + swap(c->open_buckets_partial[i], + c->open_buckets_partial[c->open_buckets_partial_nr]); + + ob->on_partial_list = false; + + scoped_guard(rcu) + bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; + + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); + spin_lock(&c->freelist_lock); + } else { + i++; + } } - } - spin_unlock(&c->freelist_lock); bch2_ec_stop_dev(c, ca); } @@ -1050,22 +1107,17 @@ static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigne struct open_bucket *ob; unsigned i; - mutex_lock(&c->write_points_hash_lock); - if (c->write_points_nr < old_nr) { - mutex_unlock(&c->write_points_hash_lock); - return true; - } + scoped_guard(mutex, &c->write_points_hash_lock) { + if (c->write_points_nr < old_nr) + return true; - if (c->write_points_nr == 1 || - !too_many_writepoints(c, 8)) { - mutex_unlock(&c->write_points_hash_lock); - return false; - } - - wp = c->write_points + --c->write_points_nr; + if (c->write_points_nr == 1 || + !too_many_writepoints(c, 8)) + return false; - hlist_del_rcu(&wp->node); - mutex_unlock(&c->write_points_hash_lock); + wp = c->write_points + --c->write_points_nr; + hlist_del_rcu(&wp->node); + } bch2_trans_mutex_lock_norelock(trans, &wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) @@ -1168,7 +1220,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, unsigned nr_replicas_required, enum bch_watermark watermark, enum bch_write_flags flags, - struct closure *_cl, + struct closure *cl, struct write_point **wp_ret) { struct bch_fs *c = trans->c; @@ -1184,18 +1236,15 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; - if (nr_replicas < 2) - erasure_code = false; - req->nr_replicas = nr_replicas; req->target = target; + req->ec = erasure_code; req->watermark = watermark; req->flags = flags; req->devs_have = devs_have; BUG_ON(!nr_replicas || !nr_replicas_required); retry: - req->ec = erasure_code; req->ptrs.nr = 0; req->nr_effective = 0; req->have_cache = false; @@ -1205,77 +1254,54 @@ retry: req->data_type = req->wp->data_type; - /* metadata may not allocate on cache devices: */ - if (req->data_type != BCH_DATA_user) - req->have_cache = true; - - /* If we're going to fall back to the whole fs, try nonblocking first */ - struct closure *cl = req->target && !(flags & BCH_WRITE_only_specified_devs) - ? _cl - : NULL; - ret = bch2_trans_relock(trans); if (ret) goto err; - while (1) { - req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); - - /* Don't allocate from devices we already have pointers to: */ - darray_for_each(*req->devs_have, i) - __clear_bit(*i, req->devs_may_alloc.d); - - open_bucket_for_each(c, &req->ptrs, ob, i) - __clear_bit(ob->dev, req->devs_may_alloc.d); - - ret = bucket_alloc_set_writepoint(c, req) ?: - bucket_alloc_set_partial(c, req) ?: - (req->ec - ? bucket_alloc_from_stripe(trans, req, _cl) - : bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl)); + /* metadata may not allocate on cache devices: */ + if (req->data_type != BCH_DATA_user) + req->have_cache = true; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto err; + if (target && !(flags & BCH_WRITE_only_specified_devs)) { + ret = open_bucket_add_buckets(trans, req, NULL); + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto alloc_done; /* Don't retry from all devices if we're out of open buckets: */ - if (ret == -BCH_ERR_open_buckets_empty) - goto retry_blocking; - - if (ret == -BCH_ERR_freelist_empty) { - if (req->target && !(flags & BCH_WRITE_only_specified_devs)) - goto retry_all; - goto retry_blocking; - } - - if (ret == -BCH_ERR_insufficient_devices && req->target) - goto retry_all; - - if (req->nr_effective < req->nr_replicas && req->ec) { - req->ec = false; - continue; - } - - if (ret == -BCH_ERR_insufficient_devices) { - if (req->nr_effective < nr_replicas_required) - goto err; - ret = 0; + if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { + int ret2 = open_bucket_add_buckets(trans, req, cl); + if (!ret2 || + bch2_err_matches(ret2, BCH_ERR_transaction_restart) || + bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { + ret = ret2; + goto alloc_done; + } } - BUG_ON(ret < 0); - break; -retry_blocking: - if (cl == _cl) - goto err; - cl = _cl; - continue; -retry_all: /* * Only try to allocate cache (durability = 0 devices) from the * specified target: */ req->have_cache = true; req->target = 0; + + ret = open_bucket_add_buckets(trans, req, cl); + } else { + ret = open_bucket_add_buckets(trans, req, cl); } +alloc_done: + BUG_ON(!ret && req->nr_effective < req->nr_replicas); + + if (erasure_code && !ec_open_bucket(c, &req->ptrs)) + pr_debug("failed to get ec bucket: ret %u", ret); + + if (ret == -BCH_ERR_insufficient_devices && + req->nr_effective >= nr_replicas_required) + ret = 0; + + if (ret) + goto err; if (req->nr_effective > req->nr_replicas) deallocate_extra_replicas(c, req); @@ -1425,35 +1451,25 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca) { - struct open_bucket *ob; + guard(printbuf_atomic)(out); - out->atomic++; - - for (ob = c->open_buckets; + for (struct open_bucket *ob = c->open_buckets; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { - spin_lock(&ob->lock); + guard(spinlock)(&ob->lock); if (ob->valid && (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); - spin_unlock(&ob->lock); } - - --out->atomic; } void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) { - unsigned i; - - out->atomic++; - spin_lock(&c->freelist_lock); + guard(printbuf_atomic)(out); + guard(spinlock)(&c->freelist_lock); - for (i = 0; i < c->open_buckets_partial_nr; i++) + for (unsigned i = 0; i < c->open_buckets_partial_nr; i++) bch2_open_bucket_to_text(out, c, c->open_buckets + c->open_buckets_partial[i]); - - spin_unlock(&c->freelist_lock); - --out->atomic; } static const char * const bch2_write_point_states[] = { @@ -1469,7 +1485,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); prt_printf(out, "%lu: ", wp->write_point); prt_human_readable_u64(out, wp->sectors_allocated << 9); @@ -1488,8 +1504,6 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, open_bucket_for_each(c, &wp->ptrs, ob, i) bch2_open_bucket_to_text(out, c, ob); printbuf_indent_sub(out, 2); - - mutex_unlock(&wp->lock); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) @@ -1576,7 +1590,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", c->opts.allocator_stuck_timeout); @@ -1589,8 +1603,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) bch2_printbuf_make_room(&buf, 4096); - buf.atomic++; - scoped_guard(rcu) + scoped_guard(rcu) { + guard(printbuf_atomic)(&buf); for_each_online_member_rcu(c, ca) { prt_printf(&buf, "Dev %u:\n", ca->dev_idx); printbuf_indent_add(&buf, 2); @@ -1598,7 +1612,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); prt_newline(&buf); } - --buf.atomic; + } prt_printf(&buf, "Copygc debug:\n"); printbuf_indent_add(&buf, 2); @@ -1612,7 +1626,6 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } static inline unsigned allocator_wait_timeout(struct bch_fs *c) |