diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-07-24 14:54:39 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-08-01 14:35:34 -0400 |
commit | 31efffa2973963aa73a29dbf4c35abf08d66f991 (patch) | |
tree | 2ed8bdacc06438987a90921c14cc0d38cae52342 | |
parent | 478637ce4bfc36c9406650da4d1a1377d2ec76e4 (diff) |
bcachefs: Account for internal fragmentation better
-rw-r--r-- | fs/bcachefs/btree_gc.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 112 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/chardev.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 14 |
8 files changed, 92 insertions, 59 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0647686abc60..c5ff65ede6b4 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -490,7 +490,8 @@ static void bch2_gc_start(struct bch_fs *c) struct bch_fs_usage *p = per_cpu_ptr(c->usage_percpu, cpu); - memset(p->s, 0, sizeof(p->s)); + memset(p->replicas, 0, sizeof(p->replicas)); + memset(p->buckets, 0, sizeof(p->buckets)); } percpu_up_write(&c->usage_lock); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 74f603fa4eb4..26659c132fde 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -183,7 +183,8 @@ found: */ replicas = bch2_extent_nr_dirty_ptrs(k); if (replicas) - stats->s[replicas - 1].data[BCH_DATA_BTREE] -= c->opts.btree_node_size; + stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -= + c->opts.btree_node_size; /* * We're dropping @k from the btree, but it's still live until the diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 519498b00d01..fc1ab5e3595b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -72,6 +72,8 @@ #include <linux/preempt.h> #include <trace/events/bcachefs.h> +static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); + #ifdef DEBUG_BUCKETS #define lg_local_lock lg_global_lock @@ -83,18 +85,24 @@ static void bch2_fs_stats_verify(struct bch_fs *c) __bch2_fs_usage_read(c); unsigned i, j; - for (i = 0; i < ARRAY_SIZE(stats.s); i++) { - for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) - if ((s64) stats.s[i].data[j] < 0) - panic("replicas %u %s underflow: %lli\n", + for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { + for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++) + if ((s64) stats.replicas[i].data[j] < 0) + panic("replicas %u %s sectors underflow: %lli\n", i + 1, bch_data_types[j], - stats.s[i].data[j]); + stats.replicas[i].data[j]); - if ((s64) stats.s[i].persistent_reserved < 0) + if ((s64) stats.replicas[i].persistent_reserved < 0) panic("replicas %u reserved underflow: %lli\n", - i + 1, stats.s[i].persistent_reserved); + i + 1, stats.replicas[i].persistent_reserved); } + for (j = 0; j < ARRAY_SIZE(stats.buckets); j++) + if ((s64) stats.replicas[i].data_buckets[j] < 0) + panic("%s buckets underflow: %lli\n", + bch_data_types[j], + stats.buckets[j]); + if ((s64) stats.online_reserved < 0) panic("sectors_online_reserved underflow: %lli\n", stats.online_reserved); @@ -237,6 +245,7 @@ bch2_fs_usage_read(struct bch_fs *c) } struct fs_usage_sum { + u64 hidden; u64 data; u64 reserved; }; @@ -246,14 +255,21 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats) struct fs_usage_sum sum = { 0 }; unsigned i, j; - for (i = 0; i < ARRAY_SIZE(stats.s); i++) { - u64 a = 0; + /* + * For superblock and journal we count bucket usage, not sector usage, + * because any internal fragmentation should _not_ be counted as + * free space: + */ + for (j = 1; j < BCH_DATA_BTREE; j++) + sum.hidden += stats.buckets[j]; - for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) - a += stats.s[i].data[j]; + for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { + for (j = BCH_DATA_BTREE; + j < ARRAY_SIZE(stats.replicas[i].data); + j++) + sum.data += stats.replicas[i].data[j] * (i + 1); - sum.data += a * (i + 1); - sum.reserved += stats.s[i].persistent_reserved * (i + 1); + sum.reserved += stats.replicas[i].persistent_reserved * (i + 1); } sum.reserved += stats.online_reserved; @@ -269,14 +285,14 @@ static u64 reserve_factor(u64 r) static u64 avail_factor(u64 r) { - return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1; + return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) +static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) { struct fs_usage_sum sum = __fs_usage_sum(stats); - return sum.data + reserve_factor(sum.reserved); + return sum.hidden + sum.data + reserve_factor(sum.reserved); } u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) @@ -284,9 +300,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) return min(c->capacity, __bch2_fs_sectors_used(c, stats)); } -u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats) +static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats) { - return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats)); + return c->capacity - bch2_fs_sectors_used(c, stats); } static inline int is_unavailable_bucket(struct bucket_mark m) @@ -322,9 +338,9 @@ static bool bucket_became_unavailable(struct bch_fs *c, } void bch2_fs_usage_apply(struct bch_fs *c, - struct bch_fs_usage *stats, - struct disk_reservation *disk_res, - struct gc_pos gc_pos) + struct bch_fs_usage *stats, + struct disk_reservation *disk_res, + struct gc_pos gc_pos) { struct fs_usage_sum sum = __fs_usage_sum(*stats); s64 added = sum.data + sum.reserved; @@ -356,6 +372,7 @@ void bch2_fs_usage_apply(struct bch_fs *c, } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, + struct bch_fs_usage *stats, struct bucket_mark old, struct bucket_mark new) { struct bch_dev_usage *dev_usage; @@ -372,6 +389,9 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_data_types[new.data_type]); } + stats->buckets[bucket_type(old)] -= ca->mi.bucket_size; + stats->buckets[bucket_type(new)] += ca->mi.bucket_size; + dev_usage = this_cpu_ptr(ca->usage_percpu); dev_usage->buckets[bucket_type(old)]--; @@ -395,17 +415,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_dev_stats_verify(ca); } -#define bucket_data_cmpxchg(c, ca, g, new, expr) \ +#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \ ({ \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bch2_dev_usage_update(c, ca, _old, new); \ + bch2_dev_usage_update(c, ca, stats, _old, new); \ _old; \ }) void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark *old) { + struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bucket *g; struct bucket_mark new; @@ -413,7 +434,7 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, g = bucket(ca, b); - *old = bucket_data_cmpxchg(c, ca, g, new, ({ + *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ BUG_ON(!is_available_bucket(new)); new.owned_by_allocator = 1; @@ -432,6 +453,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, bool owned_by_allocator, struct gc_pos pos, unsigned flags) { + struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bucket *g; struct bucket_mark old, new; @@ -442,7 +464,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, gc_will_visit(c, pos)) return; - old = bucket_data_cmpxchg(c, ca, g, new, ({ + old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ new.owned_by_allocator = owned_by_allocator; })); @@ -462,10 +484,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, unsigned sectors, struct gc_pos pos, unsigned flags) { + struct bch_fs_usage *stats; struct bucket *g; struct bucket_mark old, new; - BUG_ON(!type); + BUG_ON(type != BCH_DATA_SB && + type != BCH_DATA_JOURNAL); if (likely(c)) { percpu_rwsem_assert_held(&c->usage_lock); @@ -476,14 +500,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, } rcu_read_lock(); + stats = this_cpu_ptr(c->usage_percpu); g = bucket(ca, b); - old = bucket_data_cmpxchg(c, ca, g, new, ({ + old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ new.data_type = type; checked_add(new.dirty_sectors, sectors); - new.dirty_sectors += sectors; })); + stats->replicas[0].data[type] += sectors; + rcu_read_unlock(); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && @@ -585,7 +611,7 @@ static void bch2_mark_pointer(struct bch_fs *c, old.v.counter, new.v.counter)) != old.v.counter); - bch2_dev_usage_update(c, ca, old, new); + bch2_dev_usage_update(c, ca, stats, old, new); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && bucket_became_unavailable(c, old, new)); @@ -597,6 +623,10 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags) { + unsigned replicas = bch2_extent_nr_dirty_ptrs(k); + + BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas)); + /* * synchronization w.r.t. GC: * @@ -639,32 +669,22 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct bch_extent_crc_unpacked crc; - unsigned replicas = 0; BUG_ON(!sectors); - extent_for_each_ptr_crc(e, ptr, crc) { + extent_for_each_ptr_crc(e, ptr, crc) bch2_mark_pointer(c, e, ptr, crc, sectors, data_type, stats, journal_seq, flags); - replicas += !ptr->cached; - } - if (replicas) { - BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s)); - stats->s[replicas - 1].data[data_type] += sectors; - } + if (replicas) + stats->replicas[replicas - 1].data[data_type] += sectors; break; } - case BCH_RESERVATION: { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - - if (r.v->nr_replicas) { - BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s)); - stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors; - } + case BCH_RESERVATION: + if (replicas) + stats->replicas[replicas - 1].persistent_reserved += sectors; break; } - } percpu_up_read_preempt_enable(&c->usage_lock); } @@ -677,7 +697,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c) for_each_possible_cpu(cpu) per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0; - return bch2_fs_sectors_free(c, bch2_fs_usage_read(c)); + return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c))); } /* Used by gc when it's starting: */ diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 4116cd5f9e26..ff86d23e15e4 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -172,9 +172,7 @@ struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *, struct gc_pos); -u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); -u64 bch2_fs_sectors_free(struct bch_fs *, struct bch_fs_usage); static inline bool is_available_bucket(struct bucket_mark mark) { diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 2f23d02b9e22..7ef544b23727 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -68,7 +68,9 @@ struct bch_fs_usage { struct { u64 data[BCH_DATA_NR]; u64 persistent_reserved; - } s[BCH_REPLICAS_MAX]; + } replicas[BCH_REPLICAS_MAX]; + + u64 buckets[BCH_DATA_NR]; }; /* diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 2182ae15f01f..c18079f9c0cb 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c, for (i = 0; i < BCH_REPLICAS_MAX; i++) { dst.persistent_reserved[i] = - src.s[i].persistent_reserved; + src.replicas[i].persistent_reserved; for (j = 0; j < BCH_DATA_NR; j++) - dst.sectors[j][i] = src.s[i].data[j]; + dst.sectors[j][i] = src.replicas[i].data[j]; } ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst)); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8cfc760a87ba..21015137716c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1343,13 +1343,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct bch_fs *c = sb->s_fs_info; + struct bch_fs_usage usage = bch2_fs_usage_read(c); + u64 hidden_metadata = usage.buckets[BCH_DATA_SB] + + usage.buckets[BCH_DATA_JOURNAL]; + unsigned shift = sb->s_blocksize_bits - 9; u64 fsid; buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; - buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >> - PAGE_SECTOR_SHIFT; + buf->f_blocks = (c->capacity - hidden_metadata) >> shift; + buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift; buf->f_bavail = buf->f_bfree; buf->f_files = atomic_long_read(&c->nr_inodes); buf->f_ffree = U64_MAX; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 85e885638a71..f68e79501547 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -237,7 +237,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) "capacity:\t\t%llu\n", c->capacity); - for (replicas = 0; replicas < ARRAY_SIZE(stats.s); replicas++) { + for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) { out += scnprintf(out, end - out, "%u replicas:\n", replicas + 1); @@ -246,12 +246,20 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) out += scnprintf(out, end - out, "\t%s:\t\t%llu\n", bch2_data_types[type], - stats.s[replicas].data[type]); + stats.replicas[replicas].data[type]); out += scnprintf(out, end - out, "\treserved:\t%llu\n", - stats.s[replicas].persistent_reserved); + stats.replicas[replicas].persistent_reserved); } + out += scnprintf(out, end - out, "bucket usage\n"); + + for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) + out += scnprintf(out, end - out, + "\t%s:\t\t%llu\n", + bch2_data_types[type], + stats.buckets[type]); + out += scnprintf(out, end - out, "online reserved:\t%llu\n", stats.online_reserved); |