diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-10-30 14:32:47 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-11-12 19:16:47 -0500 |
commit | 72c1a51b24fa547612fc07a9fdb791d47deb89a3 (patch) | |
tree | b83caa52db6810ef568d65d424c359bd548686cf | |
parent | 424d1848cce0a3d567c3301107324267f4d46318 (diff) |
bcachefs: stripe support for replicas tracking
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 21 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/replicas.c | 205 | ||||
-rw-r--r-- | fs/bcachefs/replicas.h | 10 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 36 | ||||
-rw-r--r-- | fs/bcachefs/super-io.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 4 |
7 files changed, 230 insertions, 50 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 7842c81639d4..56fef9e41eb5 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -883,10 +883,11 @@ struct bch_sb_field { x(journal, 0) \ x(members, 1) \ x(crypt, 2) \ - x(replicas, 3) \ + x(replicas_v0, 3) \ x(quota, 4) \ x(disk_groups, 5) \ - x(clean, 6) + x(clean, 6) \ + x(replicas, 7) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1012,16 +1013,28 @@ enum bch_data_type { BCH_DATA_NR = 6, }; +struct bch_replicas_entry_v0 { + __u8 data_type; + __u8 nr_devs; + __u8 devs[0]; +} __attribute__((packed)); + +struct bch_sb_field_replicas_v0 { + struct bch_sb_field field; + struct bch_replicas_entry_v0 entries[0]; +} __attribute__((packed, aligned(8))); + struct bch_replicas_entry { __u8 data_type; __u8 nr_devs; + __u8 nr_required; __u8 devs[0]; -}; +} __attribute__((packed)); struct bch_sb_field_replicas { struct bch_sb_field field; struct bch_replicas_entry entries[0]; -}; +} __attribute__((packed, aligned(8))); /* BCH_SB_FIELD_quota: */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index f530f2028deb..c5d9dc4ee85e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -129,7 +129,8 @@ int bch2_fs_recovery(struct bch_fs *c) int ret; mutex_lock(&c->sb_lock); - if (!bch2_sb_get_replicas(c->disk_sb.sb)) { + if (!rcu_dereference_protected(c->replicas, + lockdep_is_held(&c->sb_lock))->nr) { bch_info(c, "building replicas info"); set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 4da35d0932ae..687fa749febe 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -44,7 +44,10 @@ static void replicas_entry_to_text(struct printbuf *out, { unsigned i; - pr_buf(out, "%u: [", e->data_type); + pr_buf(out, "%s: %u/%u [", + bch2_data_types[e->data_type], + e->nr_required, + e->nr_devs); for (i = 0; i < e->nr_devs; i++) pr_buf(out, i ? " %u" : "%u", e->devs[i]); @@ -74,6 +77,8 @@ static void extent_to_replicas(struct bkey_s_c k, const union bch_extent_entry *entry; struct extent_ptr_decoded p; + r->nr_required = 1; + extent_for_each_ptr_decode(e, p, entry) if (!p.ptr.cached) r->devs[r->nr_devs++] = p.ptr.dev; @@ -114,6 +119,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs, e->data_type = data_type; e->nr_devs = 0; + e->nr_required = 1; for (i = 0; i < devs.nr; i++) e->devs[e->nr_devs++] = devs.devs[i]; @@ -358,14 +364,13 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r) { struct bch_replicas_entry *e, *dst; struct bch_replicas_cpu *cpu_r; - unsigned nr = 0, entry_size = 0; + unsigned nr = 0, entry_size = 0, idx = 0; - if (sb_r) - for_each_replicas_entry(sb_r, e) { - entry_size = max_t(unsigned, entry_size, - replicas_entry_bytes(e)); - nr++; - } + for_each_replicas_entry(sb_r, e) { + entry_size = max_t(unsigned, entry_size, + replicas_entry_bytes(e)); + nr++; + } cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) + nr * entry_size, GFP_NOIO); @@ -375,29 +380,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r) cpu_r->nr = nr; cpu_r->entry_size = entry_size; - nr = 0; + for_each_replicas_entry(sb_r, e) { + dst = cpu_replicas_entry(cpu_r, idx++); + memcpy(dst, e, replicas_entry_bytes(e)); + replicas_entry_sort(dst); + } - if (sb_r) - for_each_replicas_entry(sb_r, e) { - dst = cpu_replicas_entry(cpu_r, nr++); - memcpy(dst, e, replicas_entry_bytes(e)); - replicas_entry_sort(dst); - } + return cpu_r; +} + +static struct bch_replicas_cpu * +__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r) +{ + struct bch_replicas_entry_v0 *e; + struct bch_replicas_cpu *cpu_r; + unsigned nr = 0, entry_size = 0, idx = 0; + + for_each_replicas_entry(sb_r, e) { + entry_size = max_t(unsigned, entry_size, + replicas_entry_bytes(e)); + nr++; + } + + entry_size += sizeof(struct bch_replicas_entry) - + sizeof(struct bch_replicas_entry_v0); + + cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) + + nr * entry_size, GFP_NOIO); + if (!cpu_r) + return NULL; + + cpu_r->nr = nr; + cpu_r->entry_size = entry_size; + + for_each_replicas_entry(sb_r, e) { + struct bch_replicas_entry *dst = + cpu_replicas_entry(cpu_r, idx++); + + dst->data_type = e->data_type; + dst->nr_devs = e->nr_devs; + dst->nr_required = 1; + memcpy(dst->devs, e->devs, e->nr_devs); + replicas_entry_sort(dst); + } - bch2_cpu_replicas_sort(cpu_r); return cpu_r; } int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) { - struct bch_sb_field_replicas *sb_r; + struct bch_sb_field_replicas *sb_v1; + struct bch_sb_field_replicas_v0 *sb_v0; struct bch_replicas_cpu *cpu_r, *old_r; - sb_r = bch2_sb_get_replicas(c->disk_sb.sb); - cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r); + if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb))) + cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1); + else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb))) + cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0); + else + cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO); + if (!cpu_r) return -ENOMEM; + bch2_cpu_replicas_sort(cpu_r); + old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock)); rcu_assign_pointer(c->replicas, cpu_r); if (old_r) @@ -406,23 +453,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) return 0; } +static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, + struct bch_replicas_cpu *r) +{ + struct bch_sb_field_replicas_v0 *sb_r; + struct bch_replicas_entry_v0 *dst; + struct bch_replicas_entry *src; + size_t bytes; + + bytes = sizeof(struct bch_sb_field_replicas); + + for_each_cpu_replicas_entry(r, src) + bytes += replicas_entry_bytes(src) - 1; + + sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb, + DIV_ROUND_UP(bytes, sizeof(u64))); + if (!sb_r) + return -ENOSPC; + + bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); + sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb); + + memset(&sb_r->entries, 0, + vstruct_end(&sb_r->field) - + (void *) &sb_r->entries); + + dst = sb_r->entries; + for_each_cpu_replicas_entry(r, src) { + dst->data_type = src->data_type; + dst->nr_devs = src->nr_devs; + memcpy(dst->devs, src->devs, src->nr_devs); + + dst = replicas_entry_next(dst); + + BUG_ON((void *) dst > vstruct_end(&sb_r->field)); + } + + return 0; +} + static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, struct bch_replicas_cpu *r) { struct bch_sb_field_replicas *sb_r; struct bch_replicas_entry *dst, *src; + bool need_v1 = false; size_t bytes; bytes = sizeof(struct bch_sb_field_replicas); - for_each_cpu_replicas_entry(r, src) + for_each_cpu_replicas_entry(r, src) { bytes += replicas_entry_bytes(src); + if (src->nr_required != 1) + need_v1 = true; + } + + if (!need_v1) + return bch2_cpu_replicas_to_sb_replicas_v0(c, r); sb_r = bch2_sb_resize_replicas(&c->disk_sb, DIV_ROUND_UP(bytes, sizeof(u64))); if (!sb_r) return -ENOSPC; + bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); + sb_r = bch2_sb_get_replicas(c->disk_sb.sb); + memset(&sb_r->entries, 0, vstruct_end(&sb_r->field) - (void *) &sb_r->entries); @@ -481,8 +577,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi if (!e->nr_devs) goto err; - err = "invalid replicas entry: too many devices"; - if (e->nr_devs >= BCH_REPLICAS_MAX) + err = "invalid replicas entry: bad nr_required"; + if (!e->nr_required || + (e->nr_required > 1 && + e->nr_required >= e->nr_devs)) goto err; err = "invalid replicas entry: invalid device"; @@ -524,6 +622,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = { .to_text = bch2_sb_replicas_to_text, }; +static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f) +{ + struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); + struct bch_sb_field_members *mi = bch2_sb_get_members(sb); + struct bch_replicas_cpu *cpu_r = NULL; + struct bch_replicas_entry_v0 *e; + const char *err; + unsigned i; + + for_each_replicas_entry_v0(sb_r, e) { + err = "invalid replicas entry: invalid data type"; + if (e->data_type >= BCH_DATA_NR) + goto err; + + err = "invalid replicas entry: no devices"; + if (!e->nr_devs) + goto err; + + err = "invalid replicas entry: invalid device"; + for (i = 0; i < e->nr_devs; i++) + if (!bch2_dev_exists(sb, mi, e->devs[i])) + goto err; + } + + err = "cannot allocate memory"; + cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r); + if (!cpu_r) + goto err; + + err = check_dup_replicas_entries(cpu_r); +err: + kfree(cpu_r); + return err; +} + +const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { + .validate = bch2_sb_validate_replicas_v0, +}; + /* Query replicas: */ bool bch2_replicas_marked(struct bch_fs *c, @@ -590,7 +727,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, memset(&ret, 0, sizeof(ret)); for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) - ret.replicas[i].nr_online = UINT_MAX; + ret.replicas[i].redundancy = INT_MAX; mi = bch2_sb_get_members(c->disk_sb.sb); rcu_read_lock(); @@ -612,9 +749,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, nr_offline++; } - ret.replicas[e->data_type].nr_online = - min(ret.replicas[e->data_type].nr_online, - nr_online); + ret.replicas[e->data_type].redundancy = + min(ret.replicas[e->data_type].redundancy, + (int) nr_online - (int) e->nr_required); ret.replicas[e->data_type].nr_offline = max(ret.replicas[e->data_type].nr_offline, @@ -623,6 +760,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, rcu_read_unlock(); + for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) + if (ret.replicas[i].redundancy == INT_MAX) + ret.replicas[i].redundancy = 0; + return ret; } @@ -637,7 +778,7 @@ static bool have_enough_devs(struct replicas_status s, bool force_if_lost) { return (!s.replicas[type].nr_offline || force_if_degraded) && - (s.replicas[type].nr_online || force_if_lost); + (s.replicas[type].redundancy >= 0 || force_if_lost); } bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) @@ -653,14 +794,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) flags & BCH_FORCE_IF_DATA_LOST)); } -unsigned bch2_replicas_online(struct bch_fs *c, bool meta) +int bch2_replicas_online(struct bch_fs *c, bool meta) { struct replicas_status s = bch2_replicas_status(c); - return meta - ? min(s.replicas[BCH_DATA_JOURNAL].nr_online, - s.replicas[BCH_DATA_BTREE].nr_online) - : s.replicas[BCH_DATA_USER].nr_online; + return (meta + ? min(s.replicas[BCH_DATA_JOURNAL].redundancy, + s.replicas[BCH_DATA_BTREE].redundancy) + : s.replicas[BCH_DATA_USER].redundancy) + 1; } unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 7deca37c30d6..0d99d7d9efd5 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -16,7 +16,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); struct replicas_status { struct { - unsigned nr_online; + int redundancy; unsigned nr_offline; } replicas[BCH_DATA_NR]; }; @@ -26,7 +26,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *, struct replicas_status bch2_replicas_status(struct bch_fs *); bool bch2_have_enough_devs(struct replicas_status, unsigned); -unsigned bch2_replicas_online(struct bch_fs *, bool); +int bch2_replicas_online(struct bch_fs *, bool); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); int bch2_replicas_gc_end(struct bch_fs *, int); @@ -45,8 +45,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned); (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ (_i) = replicas_entry_next(_i)) +#define for_each_replicas_entry_v0(_r, _i) \ + for (_i = (_r)->entries; \ + (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ + (_i) = replicas_entry_next(_i)) + int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *); extern const struct bch_sb_field_ops bch_sb_field_ops_replicas; +extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0; #endif /* _BCACHEFS_REPLICAS_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index a8d75c6add82..83523572881a 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -56,8 +56,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, void *src, *dst; src = vstruct_end(f); - f->u64s = cpu_to_le32(u64s); - dst = vstruct_end(f); + + if (u64s) { + f->u64s = cpu_to_le32(u64s); + dst = vstruct_end(f); + } else { + dst = f; + } memmove(dst, src, vstruct_end(sb->sb) - src); @@ -67,7 +72,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, sb->sb->u64s = cpu_to_le32(sb_u64s); - return f; + return u64s ? f : NULL; +} + +void bch2_sb_field_delete(struct bch_sb_handle *sb, + enum bch_sb_field_type type) +{ + struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type); + + if (f) + __bch2_sb_field_resize(sb, f, 0); } /* Superblock realloc/free: */ @@ -167,7 +181,8 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, } f = __bch2_sb_field_resize(sb, f, u64s); - f->type = cpu_to_le32(type); + if (f) + f->type = cpu_to_le32(type); return f; } @@ -358,6 +373,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) { struct bch_sb_field *src_f, *dst_f; struct bch_sb *dst = dst_handle->sb; + unsigned i; dst->version = src->version; dst->seq = src->seq; @@ -376,15 +392,17 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) memcpy(dst->features, src->features, sizeof(dst->features)); memcpy(dst->compat, src->compat, sizeof(dst->compat)); - vstruct_for_each(src, src_f) { - if (src_f->type == BCH_SB_FIELD_journal) + for (i = 0; i < BCH_SB_FIELD_NR; i++) { + if (i == BCH_SB_FIELD_journal) continue; - dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type)); + src_f = bch2_sb_field_get(src, i); + dst_f = bch2_sb_field_get(dst, i); dst_f = __bch2_sb_field_resize(dst_handle, dst_f, - le32_to_cpu(src_f->u64s)); + src_f ? le32_to_cpu(src_f->u64s) : 0); - memcpy(dst_f, src_f, vstruct_bytes(src_f)); + if (src_f) + memcpy(dst_f, src_f, vstruct_bytes(src_f)); } } diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 6eb193ac7074..c66fd9746a76 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -11,6 +11,7 @@ struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type); struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *, enum bch_sb_field_type, unsigned); +void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type); #define field_to_type(_f, _name) \ container_of_or_null(_f, struct bch_sb_field_##_name, field) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index f90b8a43a82c..f793cfbad605 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -345,8 +345,8 @@ SHOW(bch2_fs) sysfs_print(promote_whole_extents, c->promote_whole_extents); - sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true)); - sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false)); + sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true)); + sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false)); /* Debugging: */ |