diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-03-15 18:20:46 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2019-03-17 14:30:14 -0400 |
commit | e6dd376b219fdccdb4e1fa8c6d248aafef71db2f (patch) | |
tree | b33adcf31e77382a54bcc3658a8d5b68a0e98811 | |
parent | 80dda1af86971aa42e0e1456cab8f0661ec1133e (diff) |
bcachefs: allocate fs_usage in do_btree_insert_at()
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/bcachefs.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 21 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 44 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 102 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 14 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/replicas.c | 48 | ||||
-rw-r--r-- | fs/bcachefs/replicas.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 4 |
9 files changed, 174 insertions, 71 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 052ec263618e..b742d72167c6 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -632,7 +632,10 @@ struct bch_fs { struct percpu_rw_semaphore mark_lock; struct bch_fs_usage __percpu *usage[2]; - struct bch_fs_usage __percpu *usage_scratch; + + /* single element mempool: */ + struct mutex usage_scratch_lock; + struct bch_fs_usage *usage_scratch; /* * When we invalidate buckets, we use both the priority and the amount diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b1b858dedaf0..6537ed50d9e5 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1074,8 +1074,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) __bch2_btree_set_root_inmem(c, b); mutex_lock(&c->btree_interior_update_lock); - percpu_down_read_preempt_disable(&c->mark_lock); - fs_usage = bch2_fs_usage_get_scratch(c); + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, @@ -1088,7 +1088,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) fs_usage); bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); - percpu_up_read_preempt_enable(&c->mark_lock); + bch2_fs_usage_scratch_put(c, fs_usage); + percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); } @@ -1167,8 +1168,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b)); mutex_lock(&c->btree_interior_update_lock); - percpu_down_read_preempt_disable(&c->mark_lock); - fs_usage = bch2_fs_usage_get_scratch(c); + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(insert), true, 0, @@ -1189,7 +1190,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); - percpu_up_read_preempt_enable(&c->mark_lock); + bch2_fs_usage_scratch_put(c, fs_usage); + percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); bch2_btree_bset_insert_key(iter, b, node_iter, insert); @@ -1981,8 +1983,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bch2_btree_node_lock_write(b, iter); mutex_lock(&c->btree_interior_update_lock); - percpu_down_read_preempt_disable(&c->mark_lock); - fs_usage = bch2_fs_usage_get_scratch(c); + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, @@ -1993,7 +1995,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, fs_usage); bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); - percpu_up_read_preempt_enable(&c->mark_lock); + bch2_fs_usage_scratch_put(c, fs_usage); + percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) { diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a53ca2e0d4eb..fb16da18e769 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -268,8 +268,6 @@ static void btree_insert_key_leaf(struct btree_trans *trans, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; - bch2_mark_update(trans, insert); - if (!btree_node_is_extents(b)) bch2_insert_fixup_key(trans, insert); else @@ -498,11 +496,6 @@ btree_key_can_insert(struct btree_trans *trans, if (unlikely(btree_node_fake(b))) return BTREE_INSERT_BTREE_NODE_FULL; - if (!bch2_bkey_replicas_marked(c, - bkey_i_to_s_c(insert->k), - true)) - return BTREE_INSERT_NEED_MARK_REPLICAS; - ret = !btree_node_is_extents(b) ? BTREE_INSERT_OK : bch2_extent_can_insert(trans, insert, u64s); @@ -554,6 +547,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, struct btree_insert_entry **stopped_at) { struct bch_fs *c = trans->c; + struct bch_fs_usage *fs_usage = NULL; struct btree_insert_entry *i; struct btree_iter *linked; int ret; @@ -561,12 +555,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans, trans_for_each_update_iter(trans, i) BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); + trans_for_each_update_iter(trans, i) { + if (i->deferred || + !btree_node_type_needs_gc(i->iter->btree_id)) + continue; + + if (!fs_usage) { + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); + } + + if (!bch2_bkey_replicas_marked_locked(c, + bkey_i_to_s_c(i->k), true)) { + ret = BTREE_INSERT_NEED_MARK_REPLICAS; + goto out; + } + } + btree_trans_lock_write(c, trans); if (race_fault()) { ret = -EINTR; trans_restart(" (race)"); - goto out; + goto out_unlock; } /* @@ -576,7 +587,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, */ ret = btree_trans_check_can_insert(trans, stopped_at); if (ret) - goto out; + goto out_unlock; /* * Don't get journal reservation until after we know insert will @@ -584,7 +595,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, */ ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK); if (ret) - goto out; + goto out_unlock; if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (journal_seq_verify(c)) @@ -609,14 +620,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans, } } + trans_for_each_update_iter(trans, i) + bch2_mark_update(trans, i, fs_usage); + if (fs_usage) + bch2_trans_fs_usage_apply(trans, fs_usage); + trans_for_each_update(trans, i) do_btree_insert_one(trans, i); -out: +out_unlock: BUG_ON(ret && (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) && trans->journal_res.ref); btree_trans_unlock_write(trans); +out: + if (fs_usage) { + bch2_fs_usage_scratch_put(c, fs_usage); + percpu_up_read(&c->mark_lock); + } + bch2_journal_res_put(&c->journal, &trans->journal_res); return ret; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index f827ff121d02..dae718dcb507 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -143,6 +143,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c) percpu_up_write(&c->mark_lock); } +void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage) +{ + if (fs_usage == c->usage_scratch) + mutex_unlock(&c->usage_scratch_lock); + else + kfree(fs_usage); +} + +struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c) +{ + struct bch_fs_usage *ret; + unsigned bytes = fs_usage_u64s(c) * sizeof(u64); + + ret = kzalloc(bytes, GFP_NOWAIT); + if (ret) + return ret; + + if (mutex_trylock(&c->usage_scratch_lock)) + goto out_pool; + + ret = kzalloc(bytes, GFP_NOFS); + if (ret) + return ret; + + mutex_lock(&c->usage_scratch_lock); +out_pool: + ret = c->usage_scratch; + memset(ret, 0, bytes); + return ret; +} + struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) { struct bch_dev_usage ret; @@ -290,8 +321,10 @@ int bch2_fs_usage_apply(struct bch_fs *c, fs_usage->online_reserved -= added; } + preempt_disable(); acc_u64s((u64 *) this_cpu_ptr(c->usage[0]), (u64 *) fs_usage, fs_usage_u64s(c)); + preempt_enable(); return ret; } @@ -897,31 +930,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, unsigned journal_seq, unsigned flags, bool gc) { + int ret = 0; + + preempt_disable(); + if (!fs_usage || gc) fs_usage = this_cpu_ptr(c->usage[gc]); switch (k.k->type) { case KEY_TYPE_alloc: - return bch2_mark_alloc(c, k, inserting, + ret = bch2_mark_alloc(c, k, inserting, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_btree_ptr: - return bch2_mark_extent(c, k, inserting + ret = bch2_mark_extent(c, k, inserting ? c->opts.btree_node_size : -c->opts.btree_node_size, BCH_DATA_BTREE, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_extent: - return bch2_mark_extent(c, k, sectors, BCH_DATA_USER, + ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_stripe: - return bch2_mark_stripe(c, k, inserting, + ret = bch2_mark_stripe(c, k, inserting, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_inode: if (inserting) fs_usage->nr_inodes++; else fs_usage->nr_inodes--; - return 0; + break; case KEY_TYPE_reservation: { unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; @@ -931,11 +972,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, fs_usage->reserved += sectors; fs_usage->persistent_reserved[replicas - 1] += sectors; - return 0; + break; } - default: - return 0; } + + preempt_enable(); + + return ret; } int bch2_mark_key_locked(struct bch_fs *c, @@ -967,24 +1010,19 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, } void bch2_mark_update(struct btree_trans *trans, - struct btree_insert_entry *insert) + struct btree_insert_entry *insert, + struct bch_fs_usage *fs_usage) { struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; - struct bch_fs_usage *fs_usage; struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; - u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; - static int warned_disk_usage = 0; if (!btree_node_type_needs_gc(iter->btree_id)) return; - percpu_down_read_preempt_disable(&c->mark_lock); - fs_usage = bch2_fs_usage_get_scratch(c); - if (!(trans->flags & BTREE_INSERT_NOMARK)) bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - @@ -1037,16 +1075,32 @@ void bch2_mark_update(struct btree_trans *trans, bch2_btree_node_iter_advance(&node_iter, b); } +} - if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) && - !warned_disk_usage && - !xchg(&warned_disk_usage, 1)) { - char buf[200]; +void bch2_trans_fs_usage_apply(struct btree_trans *trans, + struct bch_fs_usage *fs_usage) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + static int warned_disk_usage = 0; + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; + char buf[200]; + + if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) || + warned_disk_usage || + xchg(&warned_disk_usage, 1)) + return; + + pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors); - pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors); + trans_for_each_update_iter(trans, i) { + struct btree_iter *iter = i->iter; + struct btree *b = iter->l[0].b; + struct btree_node_iter node_iter = iter->l[0].iter; + struct bkey_packed *_k; pr_err("while inserting"); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); pr_err("%s", buf); pr_err("overlapping with"); @@ -1059,8 +1113,8 @@ void bch2_mark_update(struct btree_trans *trans, k = bkey_disassemble(b, _k, &unpacked); if (btree_node_is_extents(b) - ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 - : bkey_cmp(insert->k->k.p, k.k->p)) + ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 + : bkey_cmp(i->k->k.p, k.k->p)) break; bch2_bkey_val_to_text(&PBUF(buf), c, k); @@ -1069,8 +1123,6 @@ void bch2_mark_update(struct btree_trans *trans, bch2_btree_node_iter_advance(&node_iter, b); } } - - percpu_up_read_preempt_enable(&c->mark_lock); } /* Disk reservations: */ diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index e57bf2b33000..c9706fa05824 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -218,13 +218,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c) READ_ONCE(c->replicas.nr); } -static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c) -{ - struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch); - - memset(ret, 0, fs_usage_u64s(c) * sizeof(u64)); - return ret; -} +void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *); +struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *); struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *); @@ -255,10 +250,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); -void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *); int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *); +void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, + struct bch_fs_usage *); +void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); + /* disk reservations: */ void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8a7906aa055b..1ab951c97a89 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1190,11 +1190,12 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, if (s.deleting) tmp.k.k.type = KEY_TYPE_discard; - +#if 0 + /* disabled due to lock recursion - mark_lock: */ if (debug_check_bkeys(c)) bch2_bkey_debugcheck(c, iter->l[0].b, bkey_i_to_s_c(&tmp.k)); - +#endif EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size); extent_bset_insert(c, iter, &tmp.k); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 99283b1047ff..d0076bd4fc4e 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -206,22 +206,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r, return __replicas_entry_idx(r, search) >= 0; } -bool bch2_replicas_marked(struct bch_fs *c, +static bool bch2_replicas_marked_locked(struct bch_fs *c, struct bch_replicas_entry *search, bool check_gc_replicas) { - bool marked; - if (!search->nr_devs) return true; verify_replicas_entry_sorted(search); - percpu_down_read_preempt_disable(&c->mark_lock); - marked = __replicas_has_entry(&c->replicas, search) && + return __replicas_has_entry(&c->replicas, search) && (!check_gc_replicas || likely((!c->replicas_gc.entries)) || __replicas_has_entry(&c->replicas_gc, search)); +} + +bool bch2_replicas_marked(struct bch_fs *c, + struct bch_replicas_entry *search, + bool check_gc_replicas) +{ + bool marked; + + percpu_down_read_preempt_disable(&c->mark_lock); + marked = bch2_replicas_marked_locked(c, search, check_gc_replicas); percpu_up_read_preempt_enable(&c->mark_lock); return marked; @@ -262,7 +269,7 @@ static int replicas_table_update(struct bch_fs *c, struct bch_replicas_cpu *new_r) { struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL }; - struct bch_fs_usage __percpu *new_scratch = NULL; + struct bch_fs_usage *new_scratch = NULL; unsigned bytes = sizeof(struct bch_fs_usage) + sizeof(u64) * new_r->nr; int ret = -ENOMEM; @@ -272,8 +279,7 @@ static int replicas_table_update(struct bch_fs *c, (c->usage[1] && !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))) || - !(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64), - GFP_NOIO))) + !(new_scratch = kmalloc(bytes, GFP_NOIO))) goto err; if (c->usage[0]) @@ -289,7 +295,7 @@ static int replicas_table_update(struct bch_fs *c, swap(c->replicas, *new_r); ret = 0; err: - free_percpu(new_scratch); + kfree(new_scratch); free_percpu(new_usage[1]); free_percpu(new_usage[0]); return ret; @@ -389,9 +395,9 @@ int bch2_mark_replicas(struct bch_fs *c, : bch2_mark_replicas_slowpath(c, r); } -bool bch2_bkey_replicas_marked(struct bch_fs *c, - struct bkey_s_c k, - bool check_gc_replicas) +bool bch2_bkey_replicas_marked_locked(struct bch_fs *c, + struct bkey_s_c k, + bool check_gc_replicas) { struct bch_replicas_padded search; struct bch_devs_list cached = bch2_bkey_cached_devs(k); @@ -400,13 +406,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c, for (i = 0; i < cached.nr; i++) { bch2_replicas_entry_cached(&search.e, cached.devs[i]); - if (!bch2_replicas_marked(c, &search.e, check_gc_replicas)) + if (!bch2_replicas_marked_locked(c, &search.e, + check_gc_replicas)) return false; } bkey_to_replicas(&search.e, k); - return bch2_replicas_marked(c, &search.e, check_gc_replicas); + return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas); +} + +bool bch2_bkey_replicas_marked(struct bch_fs *c, + struct bkey_s_c k, + bool check_gc_replicas) +{ + bool marked; + + percpu_down_read_preempt_disable(&c->mark_lock); + marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas); + percpu_up_read_preempt_enable(&c->mark_lock); + + return marked; } int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 1607b7bdfd50..ad97e3bc6b93 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -25,6 +25,8 @@ bool bch2_replicas_marked(struct bch_fs *, int bch2_mark_replicas(struct bch_fs *, struct bch_replicas_entry *); +bool bch2_bkey_replicas_marked_locked(struct bch_fs *, + struct bkey_s_c, bool); bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c, bool); int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1b3891726639..bbf5b9725407 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -435,7 +435,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); percpu_free_rwsem(&c->mark_lock); - free_percpu(c->usage_scratch); + kfree(c->usage_scratch); free_percpu(c->usage[0]); free_percpu(c->pcpu); mempool_exit(&c->btree_iters_pool); @@ -604,6 +604,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->btree_reserve_cache_lock); mutex_init(&c->btree_interior_update_lock); + mutex_init(&c->usage_scratch_lock); + mutex_init(&c->bio_bounce_pages_lock); bio_list_init(&c->btree_write_error_list); |