summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-02-09 21:30:46 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-02-09 21:32:46 -0500
commit7a716b76b5963dc2d158883f4497bab221932412 (patch)
treee326c5280a0c43b080f520a3bd19ba9a77b556be
parent9e6d9560d070bc14e1498a24ef6634d2b99f7e84 (diff)
Update bcachefs sources to bee7b5a4fa21 bcachefs: Pin btree cache in ram for random access in fsck
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/sort.h6
-rw-r--r--libbcachefs/backpointers.c140
-rw-r--r--libbcachefs/bbpos_types.h2
-rw-r--r--libbcachefs/bcachefs.h1
-rw-r--r--libbcachefs/bcachefs_format.h8
-rw-r--r--libbcachefs/bkey_methods.h3
-rw-r--r--libbcachefs/btree_cache.c19
-rw-r--r--libbcachefs/btree_gc.c4
-rw-r--r--libbcachefs/btree_io.c7
-rw-r--r--libbcachefs/btree_iter.c6
-rw-r--r--libbcachefs/btree_types.h9
-rw-r--r--libbcachefs/btree_update.c201
-rw-r--r--libbcachefs/btree_update.h3
-rw-r--r--libbcachefs/btree_update_interior.c3
-rw-r--r--libbcachefs/buckets.c3
-rw-r--r--libbcachefs/dirent.c10
-rw-r--r--libbcachefs/dirent.h2
-rw-r--r--libbcachefs/errcode.h8
-rw-r--r--libbcachefs/error.c10
-rw-r--r--libbcachefs/error.h2
-rw-r--r--libbcachefs/fs-common.c16
-rw-r--r--libbcachefs/fs.c7
-rw-r--r--libbcachefs/fsck.c538
-rw-r--r--libbcachefs/inode.c5
-rw-r--r--libbcachefs/inode.h14
-rw-r--r--libbcachefs/lru.c4
-rw-r--r--libbcachefs/opts.h5
-rw-r--r--libbcachefs/recovery.c2
-rw-r--r--libbcachefs/recovery_types.h1
-rw-r--r--libbcachefs/sb-downgrade.c8
-rw-r--r--libbcachefs/sb-errors_types.h13
-rw-r--r--libbcachefs/subvolume.c156
-rw-r--r--libbcachefs/subvolume.h8
-rw-r--r--libbcachefs/subvolume_format.h4
-rw-r--r--linux/mean_and_variance.c5
36 files changed, 859 insertions, 376 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index d3c500bc..1aa1789b 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-50847e296b34efabe199e408ec4d72f10a866c39
+bee7b5a4fa2135c9ec9d1c9424018ee494500bb5
diff --git a/include/linux/sort.h b/include/linux/sort.h
index afea0445..17c6ba34 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -2,6 +2,12 @@
#define _LINUX_SORT_H
#include <stdlib.h>
+#include <linux/types.h>
+
+void sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv);
static inline void sort(void *base, size_t num, size_t size,
int (*cmp_func)(const void *, const void *),
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index b4dc319b..23fe9378 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -129,8 +129,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
printbuf_exit(&buf);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
- bch2_inconsistent_error(c);
- return -EIO;
+ return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
} else {
return 0;
}
@@ -553,60 +552,61 @@ static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
};
}
-static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+static u64 mem_may_pin_bytes(struct bch_fs *c)
{
struct sysinfo i;
- u64 mem_bytes;
-
si_meminfo(&i);
- mem_bytes = i.totalram * i.mem_unit;
- return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
+
+ u64 mem_bytes = i.totalram * i.mem_unit;
+ return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+ return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
}
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
- unsigned btree_leaf_mask,
- unsigned btree_interior_mask,
+ u64 btree_leaf_mask,
+ u64 btree_interior_mask,
struct bbpos start, struct bbpos *end)
{
- struct btree_iter iter;
- struct bkey_s_c k;
- size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
- enum btree_id btree;
+ struct bch_fs *c = trans->c;
+ s64 mem_may_pin = mem_may_pin_bytes(c);
int ret = 0;
- for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
- unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+ btree_interior_mask |= btree_leaf_mask;
+
+ c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
+ c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
+ c->btree_cache.pinned_nodes_start = start;
+ c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
+
+ for (enum btree_id btree = start.btree;
+ btree < BTREE_ID_NR && !ret;
+ btree++) {
+ unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
+ struct btree_iter iter;
+ struct btree *b;
if (!((1U << btree) & btree_leaf_mask) &&
!((1U << btree) & btree_interior_mask))
continue;
- bch2_trans_node_iter_init(trans, &iter, btree,
- btree == start.btree ? start.pos : POS_MIN,
- 0, depth, 0);
- /*
- * for_each_btree_key_contineu() doesn't check the return value
- * from bch2_btree_iter_advance(), which is needed when
- * iterating over interior nodes where we'll see keys at
- * SPOS_MAX:
- */
- do {
- k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
- ret = bkey_err(k);
- if (!k.k || ret)
- break;
-
- --btree_nodes;
- if (!btree_nodes) {
- *end = BBPOS(btree, k.k->p);
+ __for_each_btree_node(trans, iter, btree,
+ btree == start.btree ? start.pos : POS_MIN,
+ 0, depth, BTREE_ITER_PREFETCH, b, ret) {
+ mem_may_pin -= btree_buf_bytes(b);
+ if (mem_may_pin <= 0) {
+ c->btree_cache.pinned_nodes_end = *end =
+ BBPOS(btree, b->key.k.p);
bch2_trans_iter_exit(trans, &iter);
return 0;
}
- } while (bch2_btree_iter_advance(&iter));
+ }
bch2_trans_iter_exit(trans, &iter);
}
- *end = BBPOS_MAX;
return ret;
}
@@ -664,62 +664,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
return 0;
}
-static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
- struct bpos bucket)
-{
- return bch2_dev_exists2(c, bucket.inode)
- ? bucket_pos_to_bp(c, bucket, 0)
- : bucket;
-}
-
-static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
- struct bpos start, struct bpos *end)
-{
- struct btree_iter alloc_iter;
- struct btree_iter bp_iter;
- struct bkey_s_c alloc_k, bp_k;
- size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
- bool alloc_end = false, bp_end = false;
- int ret = 0;
-
- bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
- start, 0, 1, 0);
- bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
- bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
- while (1) {
- alloc_k = !alloc_end
- ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
- : bkey_s_c_null;
- bp_k = !bp_end
- ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
- : bkey_s_c_null;
-
- ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
- if ((!alloc_k.k && !bp_k.k) || ret) {
- *end = SPOS_MAX;
- break;
- }
-
- --btree_nodes;
- if (!btree_nodes) {
- *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
- break;
- }
-
- if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
- bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
- if (!bch2_btree_iter_advance(&alloc_iter))
- alloc_end = true;
- } else {
- if (!bch2_btree_iter_advance(&bp_iter))
- bp_end = true;
- }
- }
- bch2_trans_iter_exit(trans, &bp_iter);
- bch2_trans_iter_exit(trans, &alloc_iter);
- return ret;
-}
-
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
@@ -730,10 +674,16 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bkey_init(&s.last_flushed.k->k);
while (1) {
- ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
+ struct bbpos end;
+ ret = bch2_get_btree_in_memory_pos(trans,
+ BIT_ULL(BTREE_ID_backpointers),
+ BIT_ULL(BTREE_ID_backpointers),
+ BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
if (ret)
break;
+ s.bucket_end = end.pos;
+
if ( bpos_eq(s.bucket_start, POS_MIN) &&
!bpos_eq(s.bucket_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -761,6 +711,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
+ c->btree_cache.pinned_nodes_leaf_mask = 0;
+ c->btree_cache.pinned_nodes_interior_mask = 0;
+
bch_err_fn(c, ret);
return ret;
}
@@ -866,6 +819,9 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
}
bch2_trans_put(trans);
+ c->btree_cache.pinned_nodes_leaf_mask = 0;
+ c->btree_cache.pinned_nodes_interior_mask = 0;
+
bch_err_fn(c, ret);
return ret;
}
diff --git a/libbcachefs/bbpos_types.h b/libbcachefs/bbpos_types.h
index 5198e94c..f6389334 100644
--- a/libbcachefs/bbpos_types.h
+++ b/libbcachefs/bbpos_types.h
@@ -13,6 +13,6 @@ static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
}
#define BBPOS_MIN BBPOS(0, POS_MIN)
-#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
+#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, SPOS_MAX)
#endif /* _BCACHEFS_BBPOS_TYPES_H */
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 70369495..b53b321b 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -505,6 +505,7 @@ enum gc_phase {
GC_PHASE_BTREE_deleted_inodes,
GC_PHASE_BTREE_logged_ops,
GC_PHASE_BTREE_rebalance_work,
+ GC_PHASE_BTREE_subvolume_children,
GC_PHASE_PENDING_DELETE,
};
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 14f61361..1bb24aa7 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -840,7 +840,9 @@ struct bch_sb_field_downgrade {
x(snapshot_skiplists, BCH_VERSION(1, 1)) \
x(deleted_inodes, BCH_VERSION(1, 2)) \
x(rebalance_work, BCH_VERSION(1, 3)) \
- x(member_seq, BCH_VERSION(1, 4))
+ x(member_seq, BCH_VERSION(1, 4)) \
+ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
+ x(btree_subvolume_children, BCH_VERSION(1, 6))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1488,7 +1490,9 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
- BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
+ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
+ x(subvolume_children, 19, 0, \
+ BIT_ULL(KEY_TYPE_set))
enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr,
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index 03efe8ee..f8217b24 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -78,6 +78,7 @@ bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
enum btree_update_flags {
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
+ __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE,
__BTREE_UPDATE_NOJOURNAL,
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
@@ -91,6 +92,8 @@ enum btree_update_flags {
};
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
+#define BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE \
+ (1U << __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE)
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 9b7ea122..79975046 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bbpos.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
@@ -208,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
int ret = 0;
lockdep_assert_held(&bc->lock);
+
+ struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
+
+ u64 mask = b->c.level
+ ? bc->pinned_nodes_interior_mask
+ : bc->pinned_nodes_leaf_mask;
+
+ if ((mask & BIT_ULL(b->c.btree_id)) &&
+ bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
+ bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
wait_on_io:
if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)|
@@ -905,7 +918,7 @@ retry:
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-BCH_ERR_btree_node_read_error);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -996,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-BCH_ERR_btree_node_read_error);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1079,7 +1092,7 @@ lock_node:
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
- b = ERR_PTR(-EIO);
+ b = ERR_PTR(-BCH_ERR_btree_node_read_error);
goto out;
}
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index eb92526b..6c52f116 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -407,7 +407,7 @@ again:
printbuf_reset(&buf);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
- if (mustfix_fsck_err_on(ret == -EIO, c,
+ if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c,
btree_node_unreadable,
"Topology repair: unreadable btree node at btree %s level %u:\n"
" %s",
@@ -979,7 +979,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
false);
ret = PTR_ERR_OR_ZERO(child);
- if (ret == -EIO) {
+ if (bch2_err_matches(ret, EIO)) {
bch2_topology_error(c);
if (__fsck_err(c,
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 61b60938..86415701 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -581,8 +581,7 @@ static int __btree_err(int ret,
break;
case -BCH_ERR_btree_node_read_err_bad_node:
bch2_print_string_as_lines(KERN_ERR, out.buf);
- bch2_topology_error(c);
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
+ ret = bch2_topology_error(c);
break;
case -BCH_ERR_btree_node_read_err_incompatible:
bch2_print_string_as_lines(KERN_ERR, out.buf);
@@ -1737,7 +1736,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
list_move(&b->list, &c->btree_cache.freeable);
mutex_unlock(&c->btree_cache.lock);
- ret = -EIO;
+ ret = -BCH_ERR_btree_node_read_error;
goto err;
}
@@ -1841,7 +1840,7 @@ static void btree_node_write_work(struct work_struct *work)
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
- ret = -BCH_ERR_btree_write_all_failed;
+ ret = -BCH_ERR_btree_node_write_all_failed;
goto err;
}
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 3aac6ed5..07b1de5c 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2303,7 +2303,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
btree_iter_path(trans, iter)->level);
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
- return bkey_s_c_err(-EIO);
+ return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
@@ -2501,6 +2501,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
k = bch2_btree_iter_peek_upto(&iter2, end);
if (k.k && !bkey_err(k)) {
+ swap(iter->key_cache_path, iter2.key_cache_path);
iter->k = iter2.k;
k.k = &iter->k;
}
@@ -2760,6 +2761,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
struct btree_trans *trans = src->trans;
*dst = *src;
+#ifdef TRACK_PATH_ALLOCATED
+ dst->ip_allocated = _RET_IP_;
+#endif
if (src->path)
__btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
if (src->update_path)
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 0d5eecbd..b2ebf143 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -6,6 +6,7 @@
#include <linux/list.h>
#include <linux/rhashtable.h>
+#include "bbpos_types.h"
#include "btree_key_cache_types.h"
#include "buckets_types.h"
#include "errcode.h"
@@ -173,6 +174,11 @@ struct btree_cache {
*/
struct task_struct *alloc_lock;
struct closure_waitlist alloc_wait;
+
+ struct bbpos pinned_nodes_start;
+ struct bbpos pinned_nodes_end;
+ u64 pinned_nodes_leaf_mask;
+ u64 pinned_nodes_interior_mask;
};
struct btree_node_iter {
@@ -654,6 +660,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
BIT_ULL(BKEY_TYPE_inodes)| \
BIT_ULL(BKEY_TYPE_stripes)| \
BIT_ULL(BKEY_TYPE_reflink)| \
+ BIT_ULL(BKEY_TYPE_subvolumes)| \
BIT_ULL(BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
@@ -727,7 +734,7 @@ struct btree_root {
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level;
u8 alive;
- s8 error;
+ s16 error;
};
enum btree_gc_coalesce_fail_reason {
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index e5193116..d3d625d4 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -82,40 +82,169 @@ static noinline int extent_back_merge(struct btree_trans *trans,
return 0;
}
+static struct bkey_s_c peek_slot_including_whiteouts(struct btree_trans *trans, struct btree_iter *iter,
+ enum btree_id btree, struct bpos pos)
+{
+ struct bkey_s_c k;
+ int ret;
+
+ for_each_btree_key_norestart(trans, *iter, btree, pos,
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_NOPRESERVE, k, ret) {
+ if (!bkey_eq(k.k->p, pos))
+ break;
+ if (bch2_snapshot_is_ancestor(trans->c, pos.snapshot, k.k->p.snapshot))
+ return k;
+ }
+ bch2_trans_iter_exit(trans, iter);
+
+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
+}
+
/*
* When deleting, check if we need to emit a whiteout (because we're overwriting
* something in an ancestor snapshot)
*/
-static int need_whiteout_for_snapshot(struct btree_trans *trans,
- enum btree_id btree_id, struct bpos pos)
+static int need_whiteout_for_snapshot(struct btree_trans *trans, enum btree_id btree, struct bpos pos)
+{
+ pos.snapshot = bch2_snapshot_parent(trans->c, pos.snapshot);
+ if (!pos.snapshot)
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c k = peek_slot_including_whiteouts(trans, &iter, btree, pos);
+ int ret = bkey_err(k) ?: k.k && !bkey_whiteout(k.k);
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+/*
+ * We're overwriting a key at @pos in snapshot @snapshot, so we need to insert a
+ * whiteout: that might be in @snapshot, or if there are overwites in sibling
+ * snapshots, find the common ancestor where @pos is overwritten in every
+ * descendent and insert the whiteout there - which might be at @pos.
+ */
+static int delete_interior_snapshot_key(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bpos whiteout, bool deleting,
+ struct bpos overwrite, bool old_is_whiteout)
{
+ struct bch_fs *c = trans->c;
+ struct bpos orig_whiteout = whiteout, sib = whiteout;
struct btree_iter iter;
struct bkey_s_c k;
- u32 snapshot = pos.snapshot;
int ret;
- if (!bch2_snapshot_parent(trans->c, pos.snapshot))
- return 0;
+ sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
- pos.snapshot++;
+ for_each_btree_key_norestart(trans, iter, btree, sib,
+ BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_INTENT, k, ret) {
+ BUG_ON(bpos_gt(k.k->p, overwrite));
- for_each_btree_key_norestart(trans, iter, btree_id, pos,
- BTREE_ITER_ALL_SNAPSHOTS|
- BTREE_ITER_NOPRESERVE, k, ret) {
- if (!bkey_eq(k.k->p, pos))
+ if (bpos_lt(k.k->p, sib)) /* unrelated branch - skip */
+ continue;
+ if (bpos_gt(k.k->p, sib)) /* did not find @sib */
break;
- if (bch2_snapshot_is_ancestor(trans->c, snapshot,
- k.k->p.snapshot)) {
- ret = !bkey_whiteout(k.k);
+ /* @overwrite is also written in @sib, now check parent */
+ whiteout.snapshot = bch2_snapshot_parent(c, whiteout.snapshot);
+ if (bpos_eq(whiteout, overwrite))
break;
- }
+
+ sib = whiteout;
+ sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
}
- bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ if (!deleting && bpos_eq(whiteout, orig_whiteout))
+ goto out;
+
+ if (!bpos_eq(iter.pos, whiteout)) {
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_iter_init(trans, &iter, btree, whiteout, BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+ }
+
+ iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
+ iter.flags |= BTREE_ITER_FILTER_SNAPSHOTS;
+
+ struct bkey_i *delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+ ret = PTR_ERR_OR_ZERO(delete);
+ if (ret)
+ goto err;
+
+ bkey_init(&delete->k);
+ delete->k.p = whiteout;
+
+ ret = !bpos_eq(whiteout, overwrite)
+ ? !old_is_whiteout
+ : need_whiteout_for_snapshot(trans, btree, whiteout);
+ if (ret < 0)
+ goto err;
+ if (ret)
+ delete->k.type = KEY_TYPE_whiteout;
+
+ ret = bch2_trans_update(trans, &iter, delete,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE);
+out:
+err:
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
+/*
+ * We're overwriting a key in a snapshot that has ancestors: if we're
+ * overwriting a key in a different snapshot, we need to check if it is now
+ * fully overritten and can be deleted, and if we're deleting a key in the
+ * current snapshot we need to check if we need to leave a whiteout.
+ */
+static noinline int
+overwrite_interior_snapshot_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *k)
+{
+ struct bkey_s_c old = bch2_btree_iter_peek_slot(iter);
+
+ int ret = bkey_err(old);
+ if (ret)
+ return ret;
+
+ if (!bkey_deleted(old.k)) {
+ if (old.k->p.snapshot != k->k.p.snapshot) {
+ /*
+ * We're overwriting a key in a different snapshot:
+ * check if it's also been overwritten in siblings
+ */
+ ret = delete_interior_snapshot_key(trans, iter->btree_id,
+ k->k.p, bkey_deleted(&k->k),
+ old.k->p, bkey_whiteout(old.k));
+ if (ret)
+ return ret;
+ if (bkey_deleted(&k->k))
+ return 1;
+ } else if (bkey_deleted(&k->k)) {
+ /*
+ * We're deleting a key in the current snapshot:
+ * check if we need to leave a whiteout
+ */
+ ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
+ if (unlikely(ret < 0))
+ return ret;
+ if (ret)
+ k->k.type = KEY_TYPE_whiteout;
+ }
+ }
+
+ return 0;
+}
+
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
enum btree_id id,
struct bpos old_pos,
@@ -503,32 +632,29 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags)
{
- btree_path_idx_t path_idx = iter->update_path ?: iter->path;
- int ret;
-
if (iter->flags & BTREE_ITER_IS_EXTENTS)
return bch2_trans_update_extent(trans, iter, k, flags);
- if (bkey_deleted(&k->k) &&
- !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
- (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
- ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
- if (unlikely(ret < 0))
- return ret;
-
+ if (!(flags & (BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE|
+ BTREE_UPDATE_KEY_CACHE_RECLAIM)) &&
+ (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+ bch2_snapshot_parent(trans->c, k->k.p.snapshot)) {
+ int ret = overwrite_interior_snapshot_key(trans, iter, k);
if (ret)
- k->k.type = KEY_TYPE_whiteout;
+ return ret < 0 ? ret : 0;
}
/*
* Ensure that updates to cached btrees go to the key cache:
*/
+ btree_path_idx_t path_idx = iter->update_path ?: iter->path;
struct btree_path *path = trans->paths + path_idx;
+
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
!path->cached &&
!path->level &&
btree_id_cached(trans->c, path->btree_id)) {
- ret = bch2_trans_update_get_key_cache(trans, iter, path);
+ int ret = bch2_trans_update_get_key_cache(trans, iter, path);
if (ret)
return ret;
@@ -790,6 +916,27 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
struct bpos pos, bool set)
{
+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
+ int ret = PTR_ERR_OR_ZERO(k);
+ if (ret)
+ return ret;
+
+ bkey_init(&k->k);
+ k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+ k->k.p = pos;
+
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
+ struct bpos pos, bool set)
+{
struct bkey_i k;
bkey_init(&k.k);
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index b9382b7b..cc7c53e8 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -63,11 +63,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
struct bpos, struct bpos, unsigned, u64 *);
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
+int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
enum btree_id btree, struct bpos pos)
{
- return bch2_btree_bit_mod(trans, btree, pos, false);
+ return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
}
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 030291cc..5fbea33f 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -1844,8 +1844,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
__func__, buf1.buf, buf2.buf);
printbuf_exit(&buf1);
printbuf_exit(&buf2);
- bch2_topology_error(c);
- ret = -EIO;
+ ret = bch2_topology_error(c);
goto err;
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 7dca10ba..c2f46b26 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -1053,7 +1053,8 @@ int bch2_trigger_extent(struct btree_trans *trans,
(int) bch2_bkey_needs_rebalance(c, old);
if (mod) {
- int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+ int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+ new.k->p, mod > 0);
if (ret)
return ret;
}
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 97773cff..b5ee11b5 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -201,17 +201,17 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
}
int bch2_dirent_create_snapshot(struct btree_trans *trans,
- u64 dir, u32 snapshot,
+ u32 dir_subvol, u64 dir, u32 snapshot,
const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum,
u64 *dir_offset,
bch_str_hash_flags_t str_hash_flags)
{
- subvol_inum zero_inum = { 0 };
+ subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
+ dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -220,7 +220,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
dirent->k.p.snapshot = snapshot;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
- zero_inum, snapshot,
+ dir_inum, snapshot,
&dirent->k_i, str_hash_flags,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
*dir_offset = dirent->k.p.offset;
@@ -522,7 +522,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
SPOS(dir, 0, snapshot),
POS(dir, U64_MAX), 0, k, ret)
if (k.k->type == KEY_TYPE_dirent) {
- ret = -ENOTEMPTY;
+ ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
break;
}
bch2_trans_iter_exit(trans, &iter);
diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h
index f1dd7208..34cb8e18 100644
--- a/libbcachefs/dirent.h
+++ b/libbcachefs/dirent.h
@@ -35,7 +35,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *);
-int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32,
+int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *,
bch_str_hash_flags_t);
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 3fd33b30..fe3fc14d 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -109,6 +109,8 @@
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
x(ENOENT, ENOENT_dev_not_found) \
x(ENOENT, ENOENT_dev_idx_not_found) \
+ x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
+ x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
@@ -178,6 +180,7 @@
x(EINVAL, opt_parse_error) \
x(EINVAL, remove_with_metadata_missing_unimplemented)\
x(EINVAL, remove_would_lose_data) \
+ x(EINVAL, btree_iter_with_journal_not_supported) \
x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \
x(EROFS, erofs_journal_err) \
@@ -227,7 +230,10 @@
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
x(EIO, sb_not_downgraded) \
- x(EIO, btree_write_all_failed) \
+ x(EIO, btree_node_write_all_failed) \
+ x(EIO, btree_node_read_error) \
+ x(EIO, btree_node_read_validate_error) \
+ x(EIO, btree_need_topology_repair) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 70a12539..8ae95b21 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
+#include "recovery.h"
#include "super.h"
#include <linux/thread_with_file.h>
@@ -25,11 +26,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
}
}
-void bch2_topology_error(struct bch_fs *c)
+int bch2_topology_error(struct bch_fs *c)
{
set_bit(BCH_FS_topology_error, &c->flags);
- if (!test_bit(BCH_FS_fsck_running, &c->flags))
+ if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
bch2_inconsistent_error(c);
+ return -BCH_ERR_btree_need_topology_repair;
+ } else {
+ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+ -BCH_ERR_btree_node_read_validate_error;
+ }
}
void bch2_fatal_error(struct bch_fs *c)
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index fec17d13..94491190 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -30,7 +30,7 @@ struct work_struct;
bool bch2_inconsistent_error(struct bch_fs *);
-void bch2_topology_error(struct bch_fs *);
+int bch2_topology_error(struct bch_fs *);
#define bch2_fs_inconsistent(c, ...) \
({ \
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index 523507e3..3d43c036 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -107,6 +107,7 @@ int bch2_create_trans(struct btree_trans *trans,
u32 new_subvol, dir_snapshot;
ret = bch2_subvolume_create(trans, new_inode->bi_inum,
+ dir.subvol,
snapshot_src.subvol,
&new_subvol, &snapshot,
(flags & BCH_CREATE_SNAPSHOT_RO) != 0);
@@ -242,7 +243,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
struct bch_inode_unpacked *dir_u,
struct bch_inode_unpacked *inode_u,
const struct qstr *name,
- bool deleting_snapshot)
+ bool deleting_subvol)
{
struct bch_fs *c = trans->c;
struct btree_iter dir_iter = { NULL };
@@ -270,18 +271,25 @@ int bch2_unlink_trans(struct btree_trans *trans,
if (ret)
goto err;
- if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
+ if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
ret = bch2_empty_dir_trans(trans, inum);
if (ret)
goto err;
}
- if (deleting_snapshot && !inode_u->bi_subvol) {
+ if (deleting_subvol && !inode_u->bi_subvol) {
ret = -BCH_ERR_ENOENT_not_subvol;
goto err;
}
- if (deleting_snapshot || inode_u->bi_subvol) {
+ if (inode_u->bi_subvol) {
+ /* Recursive subvolume destroy not allowed (yet?) */
+ ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
+ if (ret)
+ goto err;
+ }
+
+ if (deleting_subvol || inode_u->bi_subvol) {
ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
if (ret)
goto err;
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 77ea6109..4445fa2f 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -503,7 +503,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
__bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
- return ret;
+ return bch2_err_class(ret);
ihold(&inode->v);
d_instantiate(dentry, &inode->v);
@@ -555,8 +555,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
__bch2_unlink(vdir, dentry, false);
+ return bch2_err_class(ret);
}
static int bch2_symlink(struct mnt_idmap *idmap,
@@ -591,7 +592,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
return 0;
err:
iput(&inode->v);
- return ret;
+ return bch2_err_class(ret);
}
static int bch2_mkdir(struct mnt_idmap *idmap,
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index e4a8a14c..dfd54708 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -252,7 +252,7 @@ create_lostfound:
goto err;
ret = bch2_dirent_create_snapshot(trans,
- root_inode.bi_inum, snapshot, &root_hash_info,
+ 0, root_inode.bi_inum, snapshot, &root_hash_info,
mode_to_type(lostfound->bi_mode),
&lostfound_str,
lostfound->bi_inum,
@@ -275,9 +275,24 @@ static int reattach_inode(struct btree_trans *trans,
char name_buf[20];
struct qstr name;
u64 dir_offset = 0;
+ u32 dirent_snapshot = inode_snapshot;
int ret;
- ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
+ if (inode->bi_subvol) {
+ inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ u64 root_inum;
+ ret = subvol_lookup(trans, inode->bi_parent_subvol,
+ &dirent_snapshot, &root_inum);
+ if (ret)
+ return ret;
+
+ snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
+ } else {
+ snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+ }
+
+ ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
if (ret)
return ret;
@@ -291,14 +306,16 @@ static int reattach_inode(struct btree_trans *trans,
dir_hash = bch2_hash_info_init(trans->c, &lostfound);
- snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
name = (struct qstr) QSTR(name_buf);
ret = bch2_dirent_create_snapshot(trans,
- lostfound.bi_inum, inode_snapshot,
+ inode->bi_parent_subvol, lostfound.bi_inum,
+ dirent_snapshot,
&dir_hash,
inode_d_type(inode),
- &name, inode->bi_inum, &dir_offset,
+ &name,
+ inode->bi_subvol ?: inode->bi_inum,
+ &dir_offset,
BCH_HASH_SET_MUST_CREATE);
if (ret)
return ret;
@@ -564,13 +581,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
}
static struct inode_walker_entry *
-lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
- u32 snapshot, bool is_whiteout)
+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
{
- struct inode_walker_entry *i;
-
- snapshot = bch2_snapshot_equiv(c, snapshot);
+ bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
+ u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
+ struct inode_walker_entry *i;
__darray_for_each(w->inodes, i)
if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
goto found;
@@ -581,20 +597,24 @@ found:
if (snapshot != i->snapshot && !is_whiteout) {
struct inode_walker_entry new = *i;
- size_t pos;
- int ret;
new.snapshot = snapshot;
new.count = 0;
- bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
- w->last_pos.inode, snapshot, i->snapshot);
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
+ "unexpected because we should always update the inode when we update a key in that inode\n"
+ "%s",
+ w->last_pos.inode, snapshot, i->snapshot, buf.buf);
+ printbuf_exit(&buf);
while (i > w->inodes.data && i[-1].snapshot > snapshot)
--i;
- pos = i - w->inodes.data;
- ret = darray_insert_item(&w->inodes, pos, new);
+ size_t pos = i - w->inodes.data;
+ int ret = darray_insert_item(&w->inodes, pos, new);
if (ret)
return ERR_PTR(ret);
@@ -605,21 +625,21 @@ found:
}
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
- struct inode_walker *w, struct bpos pos,
- bool is_whiteout)
+ struct inode_walker *w,
+ struct bkey_s_c k)
{
- if (w->last_pos.inode != pos.inode) {
- int ret = get_inodes_all_snapshots(trans, w, pos.inode);
+ if (w->last_pos.inode != k.k->p.inode) {
+ int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
if (ret)
return ERR_PTR(ret);
- } else if (bkey_cmp(w->last_pos, pos)) {
+ } else if (bkey_cmp(w->last_pos, k.k->p)) {
darray_for_each(w->inodes, i)
i->seen_this_pos = false;
}
- w->last_pos = pos;
+ w->last_pos = k.k->p;
- return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
+ return lookup_inode_for_snapshot(trans->c, w, k);
}
static int __get_visible_inodes(struct btree_trans *trans,
@@ -767,6 +787,43 @@ fsck_err:
goto out;
}
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos pos)
+{
+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
+static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ u32 *snapshot)
+{
+ if (inode->bi_subvol) {
+ u64 inum;
+ int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
+ if (ret)
+ return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
+ }
+
+ return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+ struct bkey_s_c_dirent d)
+{
+ return inode->bi_dir == d.k->p.inode &&
+ inode->bi_dir_offset == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
+{
+ return d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
+}
+
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
{
struct btree_iter iter;
@@ -779,6 +836,49 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
return k.k->type == KEY_TYPE_set;
}
+static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
+ struct bch_inode_unpacked *inode,
+ u32 inode_snapshot, bool *write_inode)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ struct btree_iter dirent_iter = {};
+ struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
+ int ret = bkey_err(d);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (fsck_err_on(ret,
+ c, inode_points_to_missing_dirent,
+ "inode points to missing dirent\n%s",
+ (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
+ fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
+ c, inode_points_to_wrong_dirent,
+ "inode points to dirent that does not point back:\n%s",
+ (bch2_bkey_val_to_text(&buf, c, inode_k),
+ prt_newline(&buf),
+ bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+ /*
+ * We just clear the backpointer fields for now. If we find a
+ * dirent that points to this inode in check_dirents(), we'll
+ * update it then; then when we get to check_path() if the
+ * backpointer is still 0 we'll reattach it.
+ */
+ inode->bi_dir = 0;
+ inode->bi_dir_offset = 0;
+ inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
+ *write_inode = true;
+ }
+
+ ret = 0;
+fsck_err:
+ bch2_trans_iter_exit(trans, &dirent_iter);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -923,6 +1023,22 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
+ if (u.bi_dir || u.bi_dir_offset) {
+ ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
+ if (ret)
+ goto err;
+ }
+
+ if (fsck_err_on(u.bi_parent_subvol &&
+ (u.bi_subvol == 0 ||
+ u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
+ c, inode_bi_parent_nonzero,
+ "inode %llu:%u has subvol %u but nonzero parent subvol %u",
+ u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
+ u.bi_parent_subvol = 0;
+ do_update = true;
+ }
+
if (u.bi_subvol) {
struct bch_subvolume s;
@@ -980,28 +1096,6 @@ int bch2_check_inodes(struct bch_fs *c)
return ret;
}
-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bpos pos)
-{
- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-}
-
-static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
- struct bkey_s_c_dirent d)
-{
- return inode->bi_dir == d.k->p.inode &&
- inode->bi_dir_offset == d.k->p.offset;
-}
-
-static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *inode)
-{
- return d.v->d_type == DT_SUBVOL
- ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
- : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
-}
-
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
@@ -1310,7 +1404,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
}
- i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
+ i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i);
if (ret)
goto err;
@@ -1489,84 +1583,82 @@ fsck_err:
return ret ?: trans_was_restarted(trans, restart_count);
}
-static int check_inode_backpointer(struct btree_trans *trans,
+static int check_dirent_inode_dirent(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c_dirent d,
struct bch_inode_unpacked *target,
u32 target_snapshot)
{
struct bch_fs *c = trans->c;
- struct btree_iter bp_iter = { NULL };
struct printbuf buf = PRINTBUF;
int ret = 0;
+ if (inode_points_to_dirent(target, d))
+ return 0;
+
if (!target->bi_dir &&
!target->bi_dir_offset) {
target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset;
-
- ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
- if (ret)
- goto err;
+ return __bch2_fsck_write_inode(trans, target, target_snapshot);
}
- if (!inode_points_to_dirent(target, d)) {
- struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
- SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
- ret = bkey_err(bp_dirent);
- if (ret && !bch2_err_matches(ret, ENOENT))
- goto err;
-
- bool backpointer_exists = !ret;
- ret = 0;
-
- bch2_bkey_val_to_text(&buf, c, d.s_c);
- prt_newline(&buf);
- if (backpointer_exists)
- bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+ struct btree_iter bp_iter = { NULL };
+ struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
+ SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+ ret = bkey_err(bp_dirent);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
- if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
- c, inode_dir_multiple_links,
- "directory %llu:%u with multiple links\n%s",
- target->bi_inum, target_snapshot, buf.buf)) {
- ret = __remove_dirent(trans, d.k->p);
- goto out;
- }
+ bool backpointer_exists = !ret;
+ ret = 0;
+
+ if (fsck_err_on(!backpointer_exists,
+ c, inode_wrong_backpointer,
+ "inode %llu:%u has wrong backpointer:\n"
+ "got %llu:%llu\n"
+ "should be %llu:%llu",
+ target->bi_inum, target_snapshot,
+ target->bi_dir,
+ target->bi_dir_offset,
+ d.k->p.inode,
+ d.k->p.offset)) {
+ target->bi_dir = d.k->p.inode;
+ target->bi_dir_offset = d.k->p.offset;
+ ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+ goto out;
+ }
- /*
- * hardlinked file with nlink 0:
- * We're just adjusting nlink here so check_nlinks() will pick
- * it up, it ignores inodes with nlink 0
- */
- if (fsck_err_on(backpointer_exists && !target->bi_nlink,
- c, inode_multiple_links_but_nlink_0,
- "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
- target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
- target->bi_nlink++;
- target->bi_flags &= ~BCH_INODE_unlinked;
-
- ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
- if (ret)
- goto err;
- }
+ bch2_bkey_val_to_text(&buf, c, d.s_c);
+ prt_newline(&buf);
+ if (backpointer_exists)
+ bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+
+ if (fsck_err_on(backpointer_exists &&
+ (S_ISDIR(target->bi_mode) ||
+ target->bi_subvol),
+ c, inode_dir_multiple_links,
+ "%s %llu:%u with multiple links\n%s",
+ S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
+ target->bi_inum, target_snapshot, buf.buf)) {
+ ret = __remove_dirent(trans, d.k->p);
+ goto out;
+ }
- if (fsck_err_on(!backpointer_exists,
- c, inode_wrong_backpointer,
- "inode %llu:%u has wrong backpointer:\n"
- "got %llu:%llu\n"
- "should be %llu:%llu",
- target->bi_inum, target_snapshot,
- target->bi_dir,
- target->bi_dir_offset,
- d.k->p.inode,
- d.k->p.offset)) {
- target->bi_dir = d.k->p.inode;
- target->bi_dir_offset = d.k->p.offset;
-
- ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
- if (ret)
- goto err;
- }
+ /*
+ * hardlinked file with nlink 0:
+ * We're just adjusting nlink here so check_nlinks() will pick
+ * it up, it ignores inodes with nlink 0
+ */
+ if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+ c, inode_multiple_links_but_nlink_0,
+ "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
+ target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+ target->bi_nlink++;
+ target->bi_flags &= ~BCH_INODE_unlinked;
+ ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+ if (ret)
+ goto err;
}
out:
err:
@@ -1588,7 +1680,7 @@ static int check_dirent_target(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
- ret = check_inode_backpointer(trans, iter, d, target, target_snapshot);
+ ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
if (ret)
goto err;
@@ -1606,27 +1698,12 @@ static int check_dirent_target(struct btree_trans *trans,
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = inode_d_type(target);
-
- ret = bch2_trans_update(trans, iter, &n->k_i, 0);
- if (ret)
- goto err;
-
- d = dirent_i_to_s_c(n);
- }
-
- if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
- target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
- c, dirent_d_parent_subvol_wrong,
- "dirent has wrong d_parent_subvol field: got %u, should be %u",
- le32_to_cpu(d.v->d_parent_subvol),
- target->bi_parent_subvol)) {
- n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
- ret = PTR_ERR_OR_ZERO(n);
- if (ret)
- goto err;
-
- bkey_reassemble(&n->k_i, d.s_c);
- n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+ if (n->v.d_type == DT_SUBVOL) {
+ n->v.d_parent_subvol = target->bi_parent_subvol;
+ n->v.d_child_subvol = target->bi_subvol;
+ } else {
+ n->v.d_inum = target->bi_inum;
+ }
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
if (ret)
@@ -1641,45 +1718,113 @@ fsck_err:
return ret;
}
-static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_s_c_dirent d)
+/* find a subvolume that's a descendent of @snapshot: */
+static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
+ if (k.k->type != KEY_TYPE_subvolume)
+ continue;
+
+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+ if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
+ bch2_trans_iter_exit(trans, &iter);
+ *subvolid = k.k->p.offset;
+ goto found;
+ }
+ }
+ if (!ret)
+ ret = -ENOENT;
+found:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c_dirent d)
{
struct bch_fs *c = trans->c;
+ struct btree_iter subvol_iter = {};
struct bch_inode_unpacked subvol_root;
+ u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
- u32 target_snapshot;
- u64 target_inum;
+ u32 parent_snapshot;
+ u64 parent_inum;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- ret = subvol_lookup(trans, target_subvol,
- &target_snapshot, &target_inum);
+ ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
- if (fsck_err_on(ret, c, dirent_to_missing_subvol,
- "dirent points to missing subvolume %u",
- le32_to_cpu(d.v->d_child_subvol)))
- return __remove_dirent(trans, d.k->p);
+ if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
+ "dirent parent_subvol points to missing subvolume\n%s",
+ (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
+ fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
+ c, dirent_not_visible_in_parent_subvol,
+ "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
+ parent_snapshot,
+ (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+ u32 new_parent_subvol;
+ ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+ if (ret)
+ goto err;
- ret = lookup_inode(trans, target_inum,
- &subvol_root, &target_snapshot);
+ struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
+ ret = PTR_ERR_OR_ZERO(new_dirent);
+ if (ret)
+ goto err;
+
+ new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
+ }
+
+ struct bkey_s_c_subvolume s =
+ bch2_bkey_get_iter_typed(trans, &subvol_iter,
+ BTREE_ID_subvolumes, POS(0, target_subvol),
+ 0, subvolume);
+ ret = bkey_err(s.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
- if (fsck_err_on(ret, c, subvol_to_missing_root,
- "subvolume %u points to missing subvolume root %llu",
- target_subvol,
- target_inum)) {
- bch_err(c, "repair not implemented yet");
- return -EINVAL;
+ if (ret) {
+ if (fsck_err(c, dirent_to_missing_subvol,
+ "dirent points to missing subvolume\n%s",
+ (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
+ return __remove_dirent(trans, d.k->p);
+ ret = 0;
+ goto out;
}
- if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
- c, subvol_root_wrong_bi_subvol,
- "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
+ if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
+ c, subvol_fs_path_parent_wrong,
+ "subvol with wrong fs_path_parent, should be be %u\n%s",
+ parent_subvol,
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+ struct bkey_i_subvolume *n =
+ bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
+
+ n->v.fs_path_parent = le32_to_cpu(parent_subvol);
+ }
+
+ u64 target_inum = le64_to_cpu(s.v->inode);
+ u32 target_snapshot = le32_to_cpu(s.v->snapshot);
+
+ ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+ c, inode_bi_parent_wrong,
+ "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum,
- subvol_root.bi_subvol, target_subvol)) {
- subvol_root.bi_subvol = target_subvol;
+ subvol_root.bi_parent_subvol, parent_subvol)) {
+ subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret)
return ret;
@@ -1689,7 +1834,11 @@ static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *ite
target_snapshot);
if (ret)
return ret;
+out:
+err:
fsck_err:
+ bch2_trans_iter_exit(trans, &subvol_iter);
+ printbuf_exit(&buf);
return ret;
}
@@ -1731,7 +1880,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
- i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
+ i = walk_inode(trans, dir, k);
ret = PTR_ERR_OR_ZERO(i);
if (ret < 0)
goto err;
@@ -1777,7 +1926,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) {
- ret = check_subvol_dirent(trans, iter, d);
+ ret = check_dirent_to_subvol(trans, iter, d);
if (ret)
goto err;
} else {
@@ -1858,7 +2007,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
return ret;
- i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
+ i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i);
if (ret)
return ret;
@@ -1997,62 +2146,52 @@ static int path_down(struct bch_fs *c, pathbuf *p,
*
* XXX: we should also be verifying that inodes are in the right subvolumes
*/
-static int check_path(struct btree_trans *trans,
- pathbuf *p,
- struct bch_inode_unpacked *inode,
- u32 snapshot)
+static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
+ struct btree_iter inode_iter = {};
+ struct bch_inode_unpacked inode;
+ struct printbuf buf = PRINTBUF;
+ u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
int ret = 0;
- snapshot = bch2_snapshot_equiv(c, snapshot);
p->nr = 0;
- while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
- inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+ BUG_ON(bch2_inode_unpack(inode_k, &inode));
+
+ while (!(inode.bi_inum == BCACHEFS_ROOT_INO &&
+ inode.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
u32 parent_snapshot = snapshot;
- if (inode->bi_subvol) {
- u64 inum;
-
- ret = subvol_lookup(trans, inode->bi_parent_subvol,
- &parent_snapshot, &inum);
- if (ret)
- break;
- }
-
- d = dirent_get_by_pos(trans, &dirent_iter,
- SPOS(inode->bi_dir, inode->bi_dir_offset,
- parent_snapshot));
+ d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
break;
- if (!ret && !dirent_points_to_inode(d, inode)) {
+ if (!ret && !dirent_points_to_inode(d, &inode)) {
bch2_trans_iter_exit(trans, &dirent_iter);
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
}
if (bch2_err_matches(ret, ENOENT)) {
- if (fsck_err(c, inode_unreachable,
- "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
- inode->bi_inum, snapshot,
- bch2_d_type_str(inode_d_type(inode)),
- inode->bi_nlink,
- inode->bi_dir,
- inode->bi_dir_offset))
- ret = reattach_inode(trans, inode, snapshot);
- break;
+ ret = 0;
+ if (fsck_err(c, inode_unreachable,
+ "unreachable inode\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, inode_k),
+ buf.buf)))
+ ret = reattach_inode(trans, &inode, snapshot);
+ goto out;
}
bch2_trans_iter_exit(trans, &dirent_iter);
- if (!S_ISDIR(inode->bi_mode))
+ if (!S_ISDIR(inode.bi_mode))
break;
- ret = path_down(c, p, inode->bi_inum, snapshot);
+ ret = path_down(c, p, inode.bi_inum, snapshot);
if (ret) {
bch_err(c, "memory allocation failure");
return ret;
@@ -2060,7 +2199,12 @@ static int check_path(struct btree_trans *trans,
snapshot = parent_snapshot;
- ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
+ SPOS(0, inode.bi_dir, snapshot), 0);
+ ret = bkey_err(inode_k) ?:
+ !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
+ : bch2_inode_unpack(inode_k, &inode);
if (ret) {
/* Should have been caught in dirents pass */
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -2068,30 +2212,35 @@ static int check_path(struct btree_trans *trans,
break;
}
- if (path_is_dup(p, inode->bi_inum, snapshot)) {
+ snapshot = inode_k.k->p.snapshot;
+
+ if (path_is_dup(p, inode.bi_inum, snapshot)) {
/* XXX print path */
bch_err(c, "directory structure loop");
darray_for_each(*p, i)
pr_err("%llu:%u", i->inum, i->snapshot);
- pr_err("%llu:%u", inode->bi_inum, snapshot);
+ pr_err("%llu:%u", inode.bi_inum, snapshot);
if (!fsck_err(c, dir_loop, "directory structure loop"))
return 0;
- ret = remove_backpointer(trans, inode);
+ ret = remove_backpointer(trans, &inode);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(c, ret, "removing dirent");
if (ret)
break;
- ret = reattach_inode(trans, inode, snapshot);
+ ret = reattach_inode(trans, &inode, snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
+ bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
break;
}
}
+out:
fsck_err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@@ -2103,7 +2252,6 @@ fsck_err:
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
- struct bch_inode_unpacked u;
pathbuf path = { 0, };
int ret;
@@ -2116,12 +2264,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
if (!bkey_is_inode(k.k))
continue;
- BUG_ON(bch2_inode_unpack(k, &u));
-
- if (u.bi_flags & BCH_INODE_unlinked)
+ if (bch2_inode_flags(k) & BCH_INODE_unlinked)
continue;
- check_path(trans, &path, &u, iter.pos.snapshot);
+ check_path(trans, &path, k);
})));
darray_exit(&path);
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index dbe37ccc..414aebe1 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -620,7 +620,8 @@ int bch2_trigger_inode(struct btree_trans *trans,
bool old_deleted = bkey_is_deleted_inode(old);
bool new_deleted = bkey_is_deleted_inode(new.s_c);
if (old_deleted != new_deleted) {
- int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+ int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
+ new.k->p, new_deleted);
if (ret)
return ret;
}
@@ -1169,7 +1170,7 @@ fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
return ret;
delete:
- ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
goto out;
}
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 9a9353c0..05629805 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -177,6 +177,20 @@ static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
}
+static inline u32 bch2_inode_flags(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
+ case KEY_TYPE_inode_v2:
+ return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
+ case KEY_TYPE_inode_v3:
+ return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
+ default:
+ return 0;
+ }
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index 7a4ca5a2..ed7577cd 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -44,8 +44,8 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
u64 dev_bucket, u64 time, bool set)
{
return time
- ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
- lru_pos(lru_id, dev_bucket, time), set)
+ ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru,
+ lru_pos(lru_id, dev_bucket, time), set)
: 0;
}
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 9a4b7faa..f8c2341e 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -332,6 +332,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Run fsck on mount") \
+ x(fsck_memory_usage_percent, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_UINT(20, 70), \
+ BCH2_NO_SB_OPT, 50, \
+ NULL, "Maximum percentage of system ram fsck is allowed to pin")\
x(fix_errors, u8, \
OPT_FS|OPT_MOUNT, \
OPT_FN(bch2_opt_fix_errors), \
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 9127d0e3..4f8782d6 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -264,7 +264,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
bkey_copy(&r->key, (struct bkey_i *) entry->start);
r->error = 0;
} else {
- r->error = -EIO;
+ r->error = -BCH_ERR_btree_node_read_error;
}
r->alive = true;
break;
diff --git a/libbcachefs/recovery_types.h b/libbcachefs/recovery_types.h
index fa0c8efd..f0fc1dbb 100644
--- a/libbcachefs/recovery_types.h
+++ b/libbcachefs/recovery_types.h
@@ -34,6 +34,7 @@
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \
x(resume_logged_ops, 23, PASS_ALWAYS) \
diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c
index 626eaaea..3337419f 100644
--- a/libbcachefs/sb-downgrade.c
+++ b/libbcachefs/sb-downgrade.c
@@ -46,7 +46,13 @@
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
x(rebalance_work, \
- BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+ BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
+ x(subvolume_fs_parent, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
+ BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
+ x(btree_subvolume_children, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
+ BCH_FSCK_ERR_subvol_children_not_set)
#define DOWNGRADE_TABLE()
diff --git a/libbcachefs/sb-errors_types.h b/libbcachefs/sb-errors_types.h
index 63f18c7f..1530bd35 100644
--- a/libbcachefs/sb-errors_types.h
+++ b/libbcachefs/sb-errors_types.h
@@ -231,7 +231,7 @@
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
- x(dirent_d_parent_subvol_wrong, 226) \
+ x(inode_bi_parent_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
@@ -253,7 +253,16 @@
x(reflink_p_front_pad_bad, 245) \
x(journal_entry_dup_same_device, 246) \
x(inode_bi_subvol_missing, 247) \
- x(inode_bi_subvol_wrong, 248)
+ x(inode_bi_subvol_wrong, 248) \
+ x(inode_points_to_missing_dirent, 249) \
+ x(inode_points_to_wrong_dirent, 250) \
+ x(inode_bi_parent_nonzero, 251) \
+ x(dirent_to_missing_parent_subvol, 252) \
+ x(dirent_not_visible_in_parent_subvol, 253) \
+ x(subvol_fs_path_parent_wrong, 254) \
+ x(subvol_root_fs_path_parent_nonzero, 255) \
+ x(subvol_children_not_set, 256) \
+ x(subvol_children_bad, 257)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index e7ee52c3..ce7aed12 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -13,13 +13,26 @@
static int bch2_subvolume_delete(struct btree_trans *, u32);
+static struct bpos subvolume_children_pos(struct bkey_s_c k)
+{
+ if (k.k->type != KEY_TYPE_subvolume)
+ return POS_MIN;
+
+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+ if (!s.v->fs_path_parent)
+ return POS_MIN;
+ return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
+}
+
static int check_subvol(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct bkey_s_c_subvolume subvol;
+ struct btree_iter subvol_children_iter = {};
struct bch_snapshot snapshot;
+ struct printbuf buf = PRINTBUF;
unsigned snapid;
int ret = 0;
@@ -42,6 +55,42 @@ static int check_subvol(struct btree_trans *trans,
return ret ?: -BCH_ERR_transaction_restart_nested;
}
+ if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
+ subvol.v->fs_path_parent,
+ c, subvol_root_fs_path_parent_nonzero,
+ "root subvolume has nonzero fs_path_parent\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ struct bkey_i_subvolume *n =
+ bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
+
+ n->v.fs_path_parent = 0;
+ }
+
+ if (subvol.v->fs_path_parent) {
+ struct bpos pos = subvolume_children_pos(k);
+
+ struct bkey_s_c subvol_children_k =
+ bch2_bkey_get_iter(trans, &subvol_children_iter,
+ BTREE_ID_subvolume_children, pos, 0);
+ ret = bkey_err(subvol_children_k);
+ if (ret)
+ goto err;
+
+ if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
+ c, subvol_children_not_set,
+ "subvolume not set in subvolume_children btree at %llu:%llu\n%s",
+ pos.inode, pos.offset,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
+ if (ret)
+ goto err;
+ }
+ }
+
struct bch_inode_unpacked inode;
struct btree_iter inode_iter = {};
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
@@ -102,9 +151,10 @@ static int check_subvol(struct btree_trans *trans,
SET_BCH_SUBVOLUME_SNAP(&s->v, true);
}
}
-
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &subvol_children_iter);
+ printbuf_exit(&buf);
return ret;
}
@@ -119,6 +169,42 @@ int bch2_check_subvols(struct bch_fs *c)
return ret;
}
+static int check_subvol_child(struct btree_trans *trans,
+ struct btree_iter *child_iter,
+ struct bkey_s_c child_k)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_subvolume s;
+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
+ 0, subvolume, &s);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (fsck_err_on(ret ||
+ le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
+ c, subvol_children_bad,
+ "incorrect entry in subvolume_children btree %llu:%llu",
+ child_k.k->p.inode, child_k.k->p.offset)) {
+ ret = bch2_btree_delete_at(trans, child_iter, 0);
+ if (ret)
+ goto err;
+ }
+err:
+fsck_err:
+ return ret;
+}
+
+int bch2_check_subvol_children(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_subvol_child(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return 0;
+}
+
/* Subvolumes: */
int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -143,8 +229,50 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
le64_to_cpu(s.v->inode),
le32_to_cpu(s.v->snapshot));
- if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
- prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
+ if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
+ prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
+ prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
+ }
+}
+
+static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
+{
+ return !bpos_eq(pos, POS_MIN)
+ ? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
+ : 0;
+}
+
+int bch2_subvolume_trigger(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct bpos children_pos_old = subvolume_children_pos(old);
+ struct bpos children_pos_new = subvolume_children_pos(new.s_c);
+
+ if (!bpos_eq(children_pos_old, children_pos_new)) {
+ int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
+ subvolume_children_mod(trans, children_pos_new, true);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
+{
+ struct btree_iter iter;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
+ struct bkey_s_c k = bch2_btree_iter_peek(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+
+ return bkey_err(k) ?: k.k && k.k->p.inode == subvol
+ ? -BCH_ERR_ENOTEMPTY_subvol_not_empty
+ : 0;
}
static __always_inline int
@@ -228,8 +356,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_subvolume)
return 0;
- if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
- le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
+ if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
return 0;
s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
@@ -237,7 +365,7 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
if (ret)
return ret;
- s->v.parent = cpu_to_le32(new_parent);
+ s->v.creation_parent = cpu_to_le32(new_parent);
return 0;
}
@@ -260,7 +388,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_subvolume_reparent(trans, &iter, k,
- subvolid_to_delete, le32_to_cpu(s.parent)));
+ subvolid_to_delete, le32_to_cpu(s.creation_parent)));
}
/*
@@ -391,6 +519,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
}
int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
+ u32 parent_subvolid,
u32 src_subvolid,
u32 *new_subvolid,
u32 *new_snapshotid,
@@ -447,12 +576,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
if (ret)
goto err;
- new_subvol->v.flags = 0;
- new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
- new_subvol->v.inode = cpu_to_le64(inode);
- new_subvol->v.parent = cpu_to_le32(src_subvolid);
- new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
- new_subvol->v.otime.hi = 0;
+ new_subvol->v.flags = 0;
+ new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
+ new_subvol->v.inode = cpu_to_le64(inode);
+ new_subvol->v.creation_parent = cpu_to_le32(src_subvolid);
+ new_subvol->v.fs_path_parent = cpu_to_le32(parent_subvolid);
+ new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
+ new_subvol->v.otime.hi = 0;
SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h
index 3ca1d183..4045a180 100644
--- a/libbcachefs/subvolume.h
+++ b/libbcachefs/subvolume.h
@@ -7,17 +7,22 @@
enum bkey_invalid_flags;
int bch2_check_subvols(struct bch_fs *);
+int bch2_check_subvol_children(struct bch_fs *);
int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
.key_invalid = bch2_subvolume_invalid, \
.val_to_text = bch2_subvolume_to_text, \
+ .trigger = bch2_subvolume_trigger, \
.min_val_size = 16, \
})
+int bch2_subvol_has_children(struct btree_trans *, u32);
int bch2_subvolume_get(struct btree_trans *, unsigned,
bool, int, struct bch_subvolume *);
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
@@ -29,8 +34,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32);
-int bch2_subvolume_create(struct btree_trans *, u64, u32,
- u32 *, u32 *, bool);
+int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
int bch2_fs_subvolumes_init(struct bch_fs *);
diff --git a/libbcachefs/subvolume_format.h b/libbcachefs/subvolume_format.h
index af79134b..e029df7b 100644
--- a/libbcachefs/subvolume_format.h
+++ b/libbcachefs/subvolume_format.h
@@ -19,8 +19,8 @@ struct bch_subvolume {
* This is _not_ necessarily the subvolume of the directory containing
* this subvolume:
*/
- __le32 parent;
- __le32 pad;
+ __le32 creation_parent;
+ __le32 fs_path_parent;
bch_le128 otime;
};
diff --git a/linux/mean_and_variance.c b/linux/mean_and_variance.c
index b93d150d..21ec6afc 100644
--- a/linux/mean_and_variance.c
+++ b/linux/mean_and_variance.c
@@ -102,6 +102,8 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
* mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
* @s: mean and variance number of samples and their sums
* @x: new value to include in the &mean_and_variance_weighted
+ * @initted: caller must track whether this is the first use or not
+ * @weight: ewma weight
*
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
* values are stored bitshifted for performance and added precision.
@@ -132,6 +134,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
/**
* mean_and_variance_weighted_get_mean() - get mean from @s
* @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
*/
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
u8 weight)
@@ -143,6 +146,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
/**
* mean_and_variance_weighted_get_variance() -- get variance from @s
* @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
*/
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
u8 weight)
@@ -155,6 +159,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
/**
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s
* @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
*/
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
u8 weight)