diff options
Diffstat (limited to 'libbcachefs/disk_accounting.c')
-rw-r--r-- | libbcachefs/disk_accounting.c | 226 |
1 files changed, 125 insertions, 101 deletions
diff --git a/libbcachefs/disk_accounting.c b/libbcachefs/disk_accounting.c index b9296195..bd7a0c77 100644 --- a/libbcachefs/disk_accounting.c +++ b/libbcachefs/disk_accounting.c @@ -218,60 +218,93 @@ int bch2_accounting_update_sb(struct btree_trans *trans) return 0; } -static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) +static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a) { - struct bch_replicas_padded r; - - if (accounting_to_replicas(&r.e, a.k->p) && - !bch2_replicas_marked_locked(c, &r.e)) - return -BCH_ERR_btree_insert_need_mark_replicas; + struct bch_accounting_mem *acc = &c->accounting; - struct bch_accounting_mem *acc = &c->accounting[gc]; - unsigned new_nr_counters = acc->nr_counters + bch2_accounting_counters(a.k); - - u64 __percpu *new_counters = __alloc_percpu_gfp(new_nr_counters * sizeof(u64), - sizeof(u64), GFP_KERNEL); - if (!new_counters) - return -BCH_ERR_ENOMEM_disk_accounting; - - preempt_disable(); - memcpy(this_cpu_ptr(new_counters), - bch2_acc_percpu_u64s(acc->v, acc->nr_counters), - acc->nr_counters * sizeof(u64)); - preempt_enable(); + /* raced with another insert, already present: */ + if (eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p) < acc->k.nr) + return 0; - struct accounting_pos_offset n = { + struct accounting_mem_entry n = { .pos = a.k->p, .version = a.k->version, - .offset = acc->nr_counters, .nr_counters = bch2_accounting_counters(a.k), + .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), + sizeof(u64), GFP_KERNEL), }; - if (darray_push(&acc->k, n)) { - free_percpu(new_counters); - return -BCH_ERR_ENOMEM_disk_accounting; - } - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); + if (!n.v[0]) + goto err; + + if (acc->gc_running) { + n.v[1] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), + sizeof(u64), GFP_KERNEL); + if (!n.v[1]) + goto err; + } - free_percpu(acc->v); - acc->v = new_counters; - acc->nr_counters = new_nr_counters; + if (darray_push(&acc->k, n)) + goto err; - for (unsigned i = 0; i < n.nr_counters; i++) - this_cpu_add(acc->v[n.offset + i], a.v->d[i]); + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); return 0; +err: + free_percpu(n.v[1]); + free_percpu(n.v[0]); + return -BCH_ERR_ENOMEM_disk_accounting; } -int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) +int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) { + struct bch_replicas_padded r; + + if (accounting_to_replicas(&r.e, a.k->p) && + !bch2_replicas_marked_locked(c, &r.e)) + return -BCH_ERR_btree_insert_need_mark_replicas; + percpu_up_read(&c->mark_lock); percpu_down_write(&c->mark_lock); - int ret = __bch2_accounting_mem_mod_slowpath(c, a, gc); + int ret = __bch2_accounting_mem_insert(c, a); percpu_up_write(&c->mark_lock); percpu_down_read(&c->mark_lock); return ret; } +static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e) +{ + for (unsigned i = 0; i < e->nr_counters; i++) + if (percpu_u64_get(e->v[0] + i) || + (e->v[1] && + percpu_u64_get(e->v[1] + i))) + return false; + return true; +} + +void bch2_accounting_mem_gc(struct bch_fs *c) +{ + struct bch_accounting_mem *acc = &c->accounting; + + percpu_down_write(&c->mark_lock); + struct accounting_mem_entry *dst = acc->k.data; + + darray_for_each(acc->k, src) { + if (accounting_mem_entry_is_zero(src)) { + free_percpu(src->v[0]); + free_percpu(src->v[1]); + } else { + *dst++ = *src; + } + } + + acc->k.nr = dst - acc->k.data; + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); + percpu_up_write(&c->mark_lock); +} + /* * Read out accounting keys for replicas entries, as an array of * bch_replicas_usage entries. @@ -282,7 +315,7 @@ int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accountin */ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) { - struct bch_accounting_mem *acc = &c->accounting[0]; + struct bch_accounting_mem *acc = &c->accounting; int ret = 0; darray_init(usage); @@ -297,7 +330,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) if (!accounting_to_replicas(&u.r.r, i->pos)) continue; - bch2_accounting_mem_read(c, i->pos, &u.r.sectors, 1); + bch2_accounting_mem_read_counters(acc, i - acc->k.data, &u.r.sectors, 1, false); ret = darray_make_room(usage, replicas_usage_bytes(&u.r)); if (ret) @@ -316,7 +349,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned accounting_types_mask) { - struct bch_accounting_mem *acc = &c->accounting[0]; + struct bch_accounting_mem *acc = &c->accounting; int ret = 0; darray_init(out_buf); @@ -338,7 +371,8 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc bkey_accounting_init((void *) &darray_top(*out_buf)); set_bkey_val_u64s(&a_out->k, i->nr_counters); a_out->k.p = i->pos; - bch2_accounting_mem_read(c, i->pos, a_out->v.d, i->nr_counters); + bch2_accounting_mem_read_counters(acc, i - acc->k.data, + a_out->v.d, i->nr_counters, false); if (!bch2_accounting_key_is_zero(accounting_i_to_s_c(a_out))) out_buf->nr += bkey_bytes(&a_out->k); @@ -353,7 +387,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) { - struct bch_accounting_mem *acc = &c->accounting[0]; + struct bch_accounting_mem *acc = &c->accounting; percpu_down_read(&c->mark_lock); out->atomic++; @@ -365,7 +399,7 @@ void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) bch2_accounting_key_to_text(out, &acc_k); u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(c, i, v, ARRAY_SIZE(v), false); + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); prt_str(out, ":"); for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) @@ -377,81 +411,64 @@ void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) percpu_up_read(&c->mark_lock); } -/* Ensures all counters in @src exist in @dst: */ -static int copy_counters(struct bch_accounting_mem *dst, - struct bch_accounting_mem *src) +static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc) { - unsigned orig_dst_k_nr = dst->k.nr; - unsigned dst_counters = dst->nr_counters; - - darray_for_each(src->k, i) - if (eytzinger0_find(dst->k.data, orig_dst_k_nr, sizeof(dst->k.data[0]), - accounting_pos_cmp, &i->pos) >= orig_dst_k_nr) { - if (darray_push(&dst->k, ((struct accounting_pos_offset) { - .pos = i->pos, - .offset = dst_counters, - .nr_counters = i->nr_counters }))) - goto err; - - dst_counters += i->nr_counters; - } - - if (dst->k.nr == orig_dst_k_nr) - return 0; - - u64 __percpu *new_counters = __alloc_percpu_gfp(dst_counters * sizeof(u64), - sizeof(u64), GFP_KERNEL); - if (!new_counters) - goto err; + darray_for_each(acc->k, e) { + free_percpu(e->v[gc]); + e->v[gc] = NULL; + } +} - preempt_disable(); - memcpy(this_cpu_ptr(new_counters), - bch2_acc_percpu_u64s(dst->v, dst->nr_counters), - dst->nr_counters * sizeof(u64)); - preempt_enable(); +int bch2_gc_accounting_start(struct bch_fs *c) +{ + struct bch_accounting_mem *acc = &c->accounting; + int ret = 0; - free_percpu(dst->v); - dst->v = new_counters; - dst->nr_counters = dst_counters; + percpu_down_write(&c->mark_lock); + darray_for_each(acc->k, e) { + e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64), + sizeof(u64), GFP_KERNEL); + if (!e->v[1]) { + bch2_accounting_free_counters(acc, true); + ret = -BCH_ERR_ENOMEM_disk_accounting; + break; + } + } - eytzinger0_sort(dst->k.data, dst->k.nr, sizeof(dst->k.data[0]), accounting_pos_cmp, NULL); + acc->gc_running = !ret; + percpu_up_write(&c->mark_lock); - return 0; -err: - dst->k.nr = orig_dst_k_nr; - return -BCH_ERR_ENOMEM_disk_accounting; + return ret; } -int bch2_accounting_gc_done(struct bch_fs *c) +int bch2_gc_accounting_done(struct bch_fs *c) { - struct bch_accounting_mem *dst = &c->accounting[0]; - struct bch_accounting_mem *src = &c->accounting[1]; + struct bch_accounting_mem *acc = &c->accounting; struct btree_trans *trans = bch2_trans_get(c); struct printbuf buf = PRINTBUF; + struct bpos pos = POS_MIN; int ret = 0; percpu_down_write(&c->mark_lock); + while (1) { + unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &pos); - ret = copy_counters(dst, src) ?: - copy_counters(src, dst); - if (ret) - goto err; - - BUG_ON(dst->k.nr != src->k.nr); + if (idx >= acc->k.nr) + break; - for (unsigned i = 0; i < src->k.nr; i++) { - BUG_ON(src->k.data[i].nr_counters != dst->k.data[i].nr_counters); - BUG_ON(!bpos_eq(dst->k.data[i].pos, src->k.data[i].pos)); + struct accounting_mem_entry *e = acc->k.data + idx; + pos = bpos_successor(e->pos); struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, src->k.data[i].pos); + bpos_to_disk_accounting_pos(&acc_k, e->pos); - unsigned nr = src->k.data[i].nr_counters; u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(c, i, dst_v, nr, false); - bch2_accounting_mem_read_counters(c, i, src_v, nr, true); + unsigned nr = e->nr_counters; + bch2_accounting_mem_read_counters(acc, idx, dst_v, nr, false); + bch2_accounting_mem_read_counters(acc, idx, src_v, nr, true); if (memcmp(dst_v, src_v, nr * sizeof(u64))) { printbuf_reset(&buf); @@ -470,8 +487,10 @@ int bch2_accounting_gc_done(struct bch_fs *c) src_v[j] -= dst_v[j]; if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) { + percpu_up_write(&c->mark_lock); ret = commit_do(trans, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); + percpu_down_write(&c->mark_lock); if (ret) goto err; @@ -535,7 +554,7 @@ fsck_err: */ int bch2_accounting_read(struct bch_fs *c) { - struct bch_accounting_mem *acc = &c->accounting[0]; + struct bch_accounting_mem *acc = &c->accounting; int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, @@ -595,7 +614,7 @@ int bch2_accounting_read(struct bch_fs *c) bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(c, i, v, ARRAY_SIZE(v), false); + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); switch (k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: @@ -748,15 +767,20 @@ void bch2_verify_accounting_clean(struct bch_fs *c) WARN_ON(mismatch); } -void bch2_accounting_free(struct bch_accounting_mem *acc) +void bch2_accounting_gc_free(struct bch_fs *c) { - darray_exit(&acc->k); - free_percpu(acc->v); - acc->v = NULL; - acc->nr_counters = 0; + lockdep_assert_held(&c->mark_lock); + + struct bch_accounting_mem *acc = &c->accounting; + + bch2_accounting_free_counters(acc, true); + acc->gc_running = false; } void bch2_fs_accounting_exit(struct bch_fs *c) { - bch2_accounting_free(&c->accounting[0]); + struct bch_accounting_mem *acc = &c->accounting; + + bch2_accounting_free_counters(acc, false); + darray_exit(&acc->k); } |