diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-02-14 00:07:38 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-04-02 16:02:54 -0400 |
commit | ada96472a3e99e3da084b01877c4aaa554d5f744 (patch) | |
tree | bfa8c8b29d7f9dd810639c758300d40ab2d51b6e | |
parent | 426f33cb342b1dba162e39db928634eb1ba390ba (diff) |
bcachefs: Kill struct bucket_mark
This switches struct bucket to using a lock, instead of cmpxchg. And now
that the protected members no longer need to fit into a u64, we can
expand the sector counts to 32 bits.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 99 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 126 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 24 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 32 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 2 |
6 files changed, 125 insertions, 162 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5ece1492d76a..bb54ac175b69 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -883,8 +883,8 @@ struct bch_alloc_v2 { #define BCH_ALLOC_FIELDS_V2() \ x(read_time, 64) \ x(write_time, 64) \ - x(dirty_sectors, 16) \ - x(cached_sectors, 16) \ + x(dirty_sectors, 32) \ + x(cached_sectors, 32) \ x(stripe, 32) \ x(stripe_redundancy, 8) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 952051b07e21..5c54a0ca681c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -571,37 +571,37 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { - g->_mark.gen = p.ptr.gen; g->gen_valid = true; + g->gen = p.ptr.gen; } else { do_update = true; } } - if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c, + if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], - p.ptr.gen, g->mark.gen, + p.ptr.gen, g->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { - g->_mark.gen = p.ptr.gen; g->gen_valid = true; - g->_mark.data_type = 0; - g->_mark.dirty_sectors = 0; - g->_mark.cached_sectors = 0; + g->gen = p.ptr.gen; + g->data_type = 0; + g->dirty_sectors = 0; + g->cached_sectors = 0; set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); } else { do_update = true; } } - if (fsck_err_on(gen_cmp(g->mark.gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c, + if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen, + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, (printbuf_reset(&buf), @@ -609,30 +609,30 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, do_update = true; if (fsck_err_on(!p.ptr.cached && - gen_cmp(p.ptr.gen, g->mark.gen) < 0, c, + gen_cmp(p.ptr.gen, g->gen) < 0, c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], - p.ptr.gen, g->mark.gen, + p.ptr.gen, g->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; - if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen) + if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) continue; - if (fsck_err_on(g->mark.data_type && - g->mark.data_type != data_type, c, + if (fsck_err_on(g->data_type && + g->data_type != data_type, c, "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[g->mark.data_type], + bch2_data_types[g->data_type], bch2_data_types[data_type], (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (data_type == BCH_DATA_btree) { - g->_mark.data_type = data_type; + g->data_type = data_type; set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); } else { do_update = true; @@ -692,7 +692,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g = PTR_GC_BUCKET(ca, ptr); - ptr->gen = g->mark.gen; + ptr->gen = g->gen; } } else { bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({ @@ -701,12 +701,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr); (ptr->cached && - (!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) || + (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) || (!ptr->cached && - gen_cmp(ptr->gen, g->mark.gen) < 0) || - gen_cmp(g->mark.gen, ptr->gen) > BUCKET_GC_GEN_MAX || - (g->mark.data_type && - g->mark.data_type != data_type); + gen_cmp(ptr->gen, g->gen) < 0) || + gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX || + (g->data_type && + g->data_type != data_type); })); again: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); @@ -1315,9 +1315,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); - struct bucket *g; + struct bucket gc; struct bkey_s_c k; - struct bkey_alloc_unpacked old_u, new_u, gc_u; + struct bkey_alloc_unpacked old_u, new_u; struct bkey_alloc_buf *a; int ret; @@ -1329,37 +1329,27 @@ static int bch2_alloc_write_key(struct btree_trans *trans, old_u = new_u = bch2_alloc_unpack(k); percpu_down_read(&c->mark_lock); - g = gc_bucket(ca, iter->pos.offset); - gc_u = (struct bkey_alloc_unpacked) { - .dev = iter->pos.inode, - .bucket = iter->pos.offset, - .gen = g->mark.gen, - .data_type = g->mark.data_type, - .dirty_sectors = g->mark.dirty_sectors, - .cached_sectors = g->mark.cached_sectors, - .stripe = g->stripe, - .stripe_redundancy = g->stripe_redundancy, - }; + gc = *gc_bucket(ca, iter->pos.offset); percpu_up_read(&c->mark_lock); if (metadata_only && - gc_u.data_type != BCH_DATA_sb && - gc_u.data_type != BCH_DATA_journal && - gc_u.data_type != BCH_DATA_btree) + gc.data_type != BCH_DATA_sb && + gc.data_type != BCH_DATA_journal && + gc.data_type != BCH_DATA_btree) return 0; - if (gen_after(old_u.gen, gc_u.gen)) + if (gen_after(old_u.gen, gc.gen)) return 0; #define copy_bucket_field(_f) \ - if (fsck_err_on(new_u._f != gc_u._f, c, \ + if (fsck_err_on(new_u._f != gc._f, c, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ ": got %u, should be %u", \ iter->pos.inode, iter->pos.offset, \ - new_u.gen, \ - bch2_data_types[new_u.data_type], \ - new_u._f, gc_u._f)) \ - new_u._f = gc_u._f; \ + gc.gen, \ + bch2_data_types[gc.data_type], \ + new_u._f, gc._f)) \ + new_u._f = gc._f; \ copy_bucket_field(gen); copy_bucket_field(data_type); @@ -1455,17 +1445,16 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) g = gc_bucket(ca, k.k->p.offset); u = bch2_alloc_unpack(k); - g->_mark.gen = u.gen; - g->gen_valid = 1; + g->gen_valid = 1; + g->gen = u.gen; if (metadata_only && (u.data_type == BCH_DATA_user || u.data_type == BCH_DATA_cached || u.data_type == BCH_DATA_parity)) { - g->_mark.data_type = u.data_type; - g->_mark.dirty_sectors = u.dirty_sectors; - g->_mark.cached_sectors = u.cached_sectors; - g->_mark.stripe = u.stripe != 0; + g->data_type = u.data_type; + g->dirty_sectors = u.dirty_sectors; + g->cached_sectors = u.cached_sectors; g->stripe = u.stripe; g->stripe_redundancy = u.stripe_redundancy; } @@ -1491,12 +1480,12 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) for_each_bucket(g, buckets) { if (metadata_only && - (g->mark.data_type == BCH_DATA_user || - g->mark.data_type == BCH_DATA_cached || - g->mark.data_type == BCH_DATA_parity)) + (g->data_type == BCH_DATA_user || + g->data_type == BCH_DATA_cached || + g->data_type == BCH_DATA_parity)) continue; - g->_mark.dirty_sectors = 0; - g->_mark.cached_sectors = 0; + g->dirty_sectors = 0; + g->cached_sectors = 0; } }; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 8ef732656f97..fbce6cdf4cf8 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -346,7 +346,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, } static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, - struct bucket_mark old, struct bucket_mark new, + struct bucket old, struct bucket new, u64 journal_seq, bool gc) { struct bkey_alloc_unpacked old_a = { @@ -581,20 +581,19 @@ int bch2_mark_alloc(struct btree_trans *trans, bch2_dev_usage_update(c, ca, old_u, new_u, journal_seq, gc); if (gc) { - struct bucket_mark old_m, m; struct bucket *g = gc_bucket(ca, new_u.bucket); - old_m = bucket_cmpxchg(g, m, ({ - m.gen = new_u.gen; - m.data_type = new_u.data_type; - m.dirty_sectors = new_u.dirty_sectors; - m.cached_sectors = new_u.cached_sectors; - m.stripe = new_u.stripe != 0; - })); + bucket_lock(g); g->gen_valid = 1; + g->gen = new_u.gen; + g->data_type = new_u.data_type; g->stripe = new_u.stripe; g->stripe_redundancy = new_u.stripe_redundancy; + g->dirty_sectors = new_u.dirty_sectors; + g->cached_sectors = new_u.cached_sectors; + + bucket_unlock(g); } percpu_up_read(&c->mark_lock); @@ -620,23 +619,12 @@ int bch2_mark_alloc(struct btree_trans *trans, return 0; } -#define checked_add(a, b) \ -({ \ - unsigned _res = (unsigned) (a) + (b); \ - bool overflow = _res > U16_MAX; \ - if (overflow) \ - _res = U16_MAX; \ - (a) = _res; \ - overflow; \ -}) - void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, enum bch_data_type data_type, unsigned sectors, struct gc_pos pos, unsigned flags) { - struct bucket *g; - struct bucket_mark old, new; + struct bucket old, new, *g; bool overflow; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -651,10 +639,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, percpu_down_read(&c->mark_lock); g = gc_bucket(ca, b); - old = bucket_cmpxchg(g, new, ({ - new.data_type = data_type; - overflow = checked_add(new.dirty_sectors, sectors); - })); + + bucket_lock(g); + old = *g; + + g->data_type = data_type; + g->dirty_sectors += sectors; + overflow = g->dirty_sectors < sectors; + + new = *g; + bucket_unlock(g); bch2_fs_inconsistent_on(old.data_type && old.data_type != data_type, c, @@ -688,7 +682,7 @@ static int check_bucket_ref(struct bch_fs *c, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 b_gen, u8 bucket_data_type, - u16 dirty_sectors, u16 cached_sectors) + u32 dirty_sectors, u32 cached_sectors) { struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); @@ -756,7 +750,7 @@ static int check_bucket_ref(struct bch_fs *c, goto err; } - if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { + if ((unsigned) (bucket_sectors + sectors) > U32_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" "while marking %s", @@ -787,8 +781,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g; - struct bucket_mark new, old; + struct bucket old, new, *g; struct printbuf buf = PRINTBUF; int ret = 0; @@ -800,33 +793,37 @@ static int mark_stripe_bucket(struct btree_trans *trans, buf.atomic++; g = PTR_GC_BUCKET(ca, ptr); - if (g->mark.dirty_sectors || + if (g->dirty_sectors || (g->stripe && g->stripe != k.k->p.offset)) { bch2_fs_inconsistent(c, "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen, + ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EINVAL; goto err; } - old = bucket_cmpxchg(g, new, ({ - ret = check_bucket_ref(c, k, ptr, sectors, data_type, - new.gen, new.data_type, - new.dirty_sectors, new.cached_sectors); - if (ret) - goto err; + bucket_lock(g); + old = *g; - new.dirty_sectors += sectors; - if (data_type) - new.data_type = data_type; + ret = check_bucket_ref(c, k, ptr, sectors, data_type, + new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) { + bucket_unlock(g); + goto err; + } - new.stripe = true; - })); + new.dirty_sectors += sectors; + if (data_type) + new.data_type = data_type; g->stripe = k.k->p.offset; g->stripe_redundancy = s->nr_redundant; + new = *g; + bucket_unlock(g); + bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); err: percpu_up_read(&c->mark_lock); @@ -839,9 +836,9 @@ static int __mark_pointer(struct btree_trans *trans, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 bucket_gen, u8 *bucket_data_type, - u16 *dirty_sectors, u16 *cached_sectors) + u32 *dirty_sectors, u32 *cached_sectors) { - u16 *dst_sectors = !ptr->cached + u32 *dst_sectors = !ptr->cached ? dirty_sectors : cached_sectors; int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type, @@ -865,11 +862,9 @@ static int bch2_mark_pointer(struct btree_trans *trans, { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bucket_mark old, new; struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bucket *g; + struct bucket old, new, *g; u8 bucket_data_type; - u64 v; int ret = 0; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -877,28 +872,25 @@ static int bch2_mark_pointer(struct btree_trans *trans, percpu_down_read(&c->mark_lock); g = PTR_GC_BUCKET(ca, &p.ptr); - v = atomic64_read(&g->_mark.v); - do { - new.v.counter = old.v.counter = v; - bucket_data_type = new.data_type; - - ret = __mark_pointer(trans, k, &p.ptr, sectors, - data_type, new.gen, - &bucket_data_type, - &new.dirty_sectors, - &new.cached_sectors); - if (ret) - goto err; + bucket_lock(g); + old = *g; - new.data_type = bucket_data_type; + bucket_data_type = g->data_type; - if (flags & BTREE_TRIGGER_NOATOMIC) { - g->_mark = new; - break; - } - } while ((v = atomic64_cmpxchg(&g->_mark.v, - old.v.counter, - new.v.counter)) != old.v.counter); + ret = __mark_pointer(trans, k, &p.ptr, sectors, + data_type, g->gen, + &bucket_data_type, + &g->dirty_sectors, + &g->cached_sectors); + if (ret) { + bucket_unlock(g); + goto err; + } + + g->data_type = bucket_data_type; + + new = *g; + bucket_unlock(g); bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); err: diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 7f7fdd024868..4a3d6bf1e3ef 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -15,20 +15,16 @@ for (_b = (_buckets)->b + (_buckets)->first_bucket; \ _b < (_buckets)->b + (_buckets)->nbuckets; _b++) -#define bucket_cmpxchg(g, new, expr) \ -({ \ - struct bucket *_g = g; \ - u64 _v = atomic64_read(&(g)->_mark.v); \ - struct bucket_mark _old; \ - \ - do { \ - (new).v.counter = _old.v.counter = _v; \ - expr; \ - } while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \ - _old.v.counter, \ - (new).v.counter)) != _old.v.counter);\ - _old; \ -}) +static inline void bucket_unlock(struct bucket *b) +{ + smp_store_release(&b->lock, 0); +} + +static inline void bucket_lock(struct bucket *b) +{ + while (xchg(&b->lock, 1)) + cpu_relax(); +} static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) { diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index f90b130fbb73..e79a33795bf9 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -7,29 +7,15 @@ #define BUCKET_JOURNAL_SEQ_BITS 16 -struct bucket_mark { - union { - atomic64_t v; - - struct { - u8 gen; - u8 data_type:3, - stripe:1; - u16 dirty_sectors; - u16 cached_sectors; - }; - }; -}; - struct bucket { - union { - struct bucket_mark _mark; - const struct bucket_mark mark; - }; - - unsigned gen_valid:1; - u8 stripe_redundancy; - u32 stripe; + u8 lock; + u8 gen_valid:1; + u8 data_type:7; + u8 gen; + u8 stripe_redundancy; + u32 stripe; + u32 dirty_sectors; + u32 cached_sectors; }; struct bucket_array { @@ -108,7 +94,7 @@ struct copygc_heap_entry { u8 dev; u8 gen; u8 replicas; - u16 fragmentation; + u32 fragmentation; u32 sectors; u64 offset; }; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0fb60d8581a7..466975a3151f 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -153,7 +153,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c) .dev = iter.pos.inode, .gen = u.gen, .replicas = 1 + u.stripe_redundancy, - .fragmentation = u.dirty_sectors * (1U << 15) + .fragmentation = (u64) u.dirty_sectors * (1ULL << 31) / ca->mi.bucket_size, .sectors = u.dirty_sectors, .offset = bucket_to_sector(ca, iter.pos.offset), |