summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-10-22 13:25:25 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2022-10-22 14:41:54 -0400
commit188b6d0c8ef1c02462a744b176557c27220112c9 (patch)
treeab1f239b3bad0335e1d3fa62415bc8d845ba68be
parent494421ee6e85514f90bb316d77e1dd4f7dad3420 (diff)
Update bcachefs sources to cd779e0cc5 bcachefs: Skip inode unpack/pack in bch2_extent_update()v0.23
-rw-r--r--.bcachefs_revision2
-rw-r--r--cmd_migrate.c2
-rw-r--r--include/linux/bug.h1
-rw-r--r--include/linux/prefetch.h3
-rw-r--r--include/linux/string.h1
-rw-r--r--libbcachefs/acl.c2
-rw-r--r--libbcachefs/alloc_background.c98
-rw-r--r--libbcachefs/alloc_foreground.c46
-rw-r--r--libbcachefs/backpointers.c10
-rw-r--r--libbcachefs/bbpos.h2
-rw-r--r--libbcachefs/bcachefs.h3
-rw-r--r--libbcachefs/bcachefs_format.h75
-rw-r--r--libbcachefs/bkey.c108
-rw-r--r--libbcachefs/bkey.h99
-rw-r--r--libbcachefs/bkey_buf.h1
-rw-r--r--libbcachefs/bkey_cmp.h129
-rw-r--r--libbcachefs/bkey_methods.c1
-rw-r--r--libbcachefs/bkey_sort.c3
-rw-r--r--libbcachefs/bset.c9
-rw-r--r--libbcachefs/bset.h94
-rw-r--r--libbcachefs/btree_cache.c97
-rw-r--r--libbcachefs/btree_cache.h3
-rw-r--r--libbcachefs/btree_gc.c2
-rw-r--r--libbcachefs/btree_iter.c106
-rw-r--r--libbcachefs/btree_iter.h30
-rw-r--r--libbcachefs/btree_key_cache.c41
-rw-r--r--libbcachefs/btree_locking.c4
-rw-r--r--libbcachefs/btree_types.h13
-rw-r--r--libbcachefs/btree_update_interior.c2
-rw-r--r--libbcachefs/btree_update_leaf.c64
-rw-r--r--libbcachefs/buckets.c21
-rw-r--r--libbcachefs/buckets.h12
-rw-r--r--libbcachefs/checksum.c2
-rw-r--r--libbcachefs/compress.c2
-rw-r--r--libbcachefs/data_update.c2
-rw-r--r--libbcachefs/debug.c8
-rw-r--r--libbcachefs/dirent.c2
-rw-r--r--libbcachefs/extents.c4
-rw-r--r--libbcachefs/fs-common.c4
-rw-r--r--libbcachefs/fs-io.c8
-rw-r--r--libbcachefs/fs.c7
-rw-r--r--libbcachefs/fsck.c3
-rw-r--r--libbcachefs/inode.c165
-rw-r--r--libbcachefs/inode.h25
-rw-r--r--libbcachefs/io.c84
-rw-r--r--libbcachefs/journal.c12
-rw-r--r--libbcachefs/journal.h23
-rw-r--r--libbcachefs/journal_reclaim.c4
-rw-r--r--libbcachefs/journal_sb.c4
-rw-r--r--libbcachefs/keylist.c1
-rw-r--r--libbcachefs/move.c4
-rw-r--r--libbcachefs/recovery.c11
-rw-r--r--libbcachefs/replicas.h1
-rw-r--r--libbcachefs/siphash.c2
-rw-r--r--libbcachefs/super-io.c6
-rw-r--r--libbcachefs/super.c37
-rw-r--r--libbcachefs/sysfs.c17
-rw-r--r--libbcachefs/util.c4
-rw-r--r--linux/string.c27
-rw-r--r--linux/string_helpers.c2
60 files changed, 1033 insertions, 522 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index da204b90..e8327f59 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-44be8c1da2e1d4edb23d5dcf3b522971c245c3f6
+cd779e0cc51cb232d17eec4537cb4769af202b5f
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 9e8ceeff..3ba51c0c 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -122,7 +122,7 @@ static void update_inode(struct bch_fs *c,
struct bkey_inode_buf packed;
int ret;
- bch2_inode_pack(c, &packed, inode);
+ bch2_inode_pack(&packed, inode);
packed.inode.k.p.snapshot = U32_MAX;
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
NULL, NULL, 0);
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 957d4087..1a10f7e6 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -2,6 +2,7 @@
#define __TOOLS_LINUX_BUG_H
#include <assert.h>
+#include <stdio.h>
#include <linux/compiler.h>
#ifdef CONFIG_VALGRIND
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index 13cb826d..b14fbe93 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -4,4 +4,7 @@
#define prefetch(p) \
({ __maybe_unused typeof(p) __var = (p); })
+#define prefetchw(p) \
+ ({ __maybe_unused typeof(p) __var = (p); })
+
#endif /* _LINUX_PREFETCH_H */
diff --git a/include/linux/string.h b/include/linux/string.h
index b5e00a09..3ceda3a3 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -6,6 +6,7 @@
#include <linux/types.h> /* for size_t */
extern size_t strlcpy(char *dest, const char *src, size_t size);
+extern ssize_t strscpy(char *dest, const char *src, size_t count);
extern char *strim(char *);
extern void memzero_explicit(void *, size_t);
int match_string(const char * const *, size_t, const char *);
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 5c6ccf68..9592541f 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -173,7 +173,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
bkey_xattr_init(&xattr->k_i);
xattr->k.u64s = u64s;
xattr->v.x_type = acl_to_xattr_type(type);
- xattr->v.x_name_len = 0,
+ xattr->v.x_name_len = 0;
xattr->v.x_val_len = cpu_to_le16(acl_len);
acl_header = xattr_val(&xattr->v);
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index d0d7690a..796b9f5a 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -210,31 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
-struct bkey_i_alloc_v4 *
-bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
- struct bpos pos)
-{
- struct bkey_s_c k;
- struct bkey_i_alloc_v4 *a;
- int ret;
-
- bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
- BTREE_ITER_WITH_UPDATES|
- BTREE_ITER_CACHED|
- BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(k);
- if (ret) {
- bch2_trans_iter_exit(trans, iter);
- return ERR_PTR(ret);
- }
-
- a = bch2_alloc_to_v4_mut(trans, k);
- if (IS_ERR(a))
- bch2_trans_iter_exit(trans, iter);
- return a;
-}
-
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
{
unsigned i, bytes = offsetof(struct bch_alloc, data);
@@ -475,12 +450,13 @@ void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
}
}
-struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+static noinline struct bkey_i_alloc_v4 *
+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
{
+ struct bkey_i_alloc_v4 *ret;
unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
? bkey_bytes(k.k)
: sizeof(struct bkey_i_alloc_v4);
- struct bkey_i_alloc_v4 *ret;
/*
* Reserve space for one more backpointer here:
@@ -491,20 +467,18 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
return ret;
if (k.k->type == KEY_TYPE_alloc_v4) {
- bkey_reassemble(&ret->k_i, k);
+ struct bch_backpointer *src, *dst;
- if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) {
- struct bch_backpointer *src, *dst;
+ bkey_reassemble(&ret->k_i, k);
- src = alloc_v4_backpointers(&ret->v);
- SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
- dst = alloc_v4_backpointers(&ret->v);
+ src = alloc_v4_backpointers(&ret->v);
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+ dst = alloc_v4_backpointers(&ret->v);
- memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
- sizeof(struct bch_backpointer));
- memset(src, 0, dst - src);
- set_alloc_v4_u64s(ret);
- }
+ memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
+ sizeof(struct bch_backpointer));
+ memset(src, 0, dst - src);
+ set_alloc_v4_u64s(ret);
} else {
bkey_alloc_v4_init(&ret->k_i);
ret->k.p = k.k->p;
@@ -513,6 +487,54 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
return ret;
}
+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
+{
+ if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
+ BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) {
+ /*
+ * Reserve space for one more backpointer here:
+ * Not sketchy at doing it this way, nope...
+ */
+ struct bkey_i_alloc_v4 *ret =
+ bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer));
+ if (!IS_ERR(ret))
+ bkey_reassemble(&ret->k_i, k);
+ return ret;
+ }
+
+ return __bch2_alloc_to_v4_mut(trans, k);
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+ return bch2_alloc_to_v4_mut_inlined(trans, k);
+}
+
+struct bkey_i_alloc_v4 *
+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bpos pos)
+{
+ struct bkey_s_c k;
+ struct bkey_i_alloc_v4 *a;
+ int ret;
+
+ bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
+ BTREE_ITER_WITH_UPDATES|
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
+ return ERR_PTR(ret);
+ }
+
+ a = bch2_alloc_to_v4_mut_inlined(trans, k);
+ if (IS_ERR(a))
+ bch2_trans_iter_exit(trans, iter);
+ return a;
+}
+
int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index e89999cf..a9e0c739 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -489,16 +489,16 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
* bch_bucket_alloc - allocate a single bucket from a specific device
*
* Returns index of bucket on success, 0 on failure
- * */
+ */
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
- struct closure *cl)
+ struct closure *cl,
+ struct bch_dev_usage *usage)
{
struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL;
- struct bch_dev_usage usage;
bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
u64 avail;
@@ -509,16 +509,16 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
u64 skipped_nouse = 0;
bool waiting = false;
again:
- usage = bch2_dev_usage_read(ca);
- avail = dev_buckets_free(ca, usage, reserve);
+ bch2_dev_usage_read_fast(ca, usage);
+ avail = dev_buckets_free(ca, *usage, reserve);
- if (usage.d[BCH_DATA_need_discard].buckets > avail)
+ if (usage->d[BCH_DATA_need_discard].buckets > avail)
bch2_do_discards(c);
- if (usage.d[BCH_DATA_need_gc_gens].buckets > avail)
+ if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_do_gc_gens(c);
- if (should_invalidate_buckets(ca, usage))
+ if (should_invalidate_buckets(ca, *usage))
bch2_do_invalidates(c);
if (!avail) {
@@ -577,10 +577,10 @@ err:
if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
may_alloc_partial, ob->bucket);
- else
+ else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail,
ca, bch2_alloc_reserves[reserve],
- usage.d[BCH_DATA_free].buckets,
+ usage->d[BCH_DATA_free].buckets,
avail,
bch2_copygc_wait_amount(c),
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
@@ -599,11 +599,12 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bool may_alloc_partial,
struct closure *cl)
{
+ struct bch_dev_usage usage;
struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
- may_alloc_partial, cl)));
+ may_alloc_partial, cl, &usage)));
return ob;
}
@@ -630,8 +631,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
return ret;
}
-void bch2_dev_stripe_increment(struct bch_dev *ca,
- struct dev_stripe_state *stripe)
+static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
+ struct dev_stripe_state *stripe,
+ struct bch_dev_usage *usage)
{
u64 *v = stripe->next_alloc + ca->dev_idx;
u64 free_space = dev_buckets_available(ca, RESERVE_none);
@@ -650,6 +652,15 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
*v = *v < scale ? 0 : *v - scale;
}
+void bch2_dev_stripe_increment(struct bch_dev *ca,
+ struct dev_stripe_state *stripe)
+{
+ struct bch_dev_usage usage;
+
+ bch2_dev_usage_read_fast(ca, &usage);
+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
+}
+
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
@@ -694,6 +705,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
BUG_ON(*nr_effective >= nr_replicas);
for (i = 0; i < devs_sorted.nr; i++) {
+ struct bch_dev_usage usage;
struct open_bucket *ob;
dev = devs_sorted.devs[i];
@@ -713,9 +725,9 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
}
ob = bch2_bucket_alloc_trans(trans, ca, reserve,
- flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
+ flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
if (!IS_ERR(ob))
- bch2_dev_stripe_increment(ca, stripe);
+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref);
if (IS_ERR(ob)) {
@@ -1110,7 +1122,7 @@ restart_find_oldest:
hlist_add_head_rcu(&wp->node, head);
mutex_unlock(&c->write_points_hash_lock);
out:
- wp->last_used = sched_clock();
+ wp->last_used = local_clock();
return wp;
}
@@ -1356,7 +1368,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
wp < c->write_points + c->write_points_nr; wp++) {
writepoint_init(wp, BCH_DATA_user);
- wp->last_used = sched_clock();
+ wp->last_used = local_clock();
wp->write_point = (unsigned long) wp;
hlist_add_head_rcu(&wp->node,
writepoint_hash(c, wp->write_point));
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index a537768c..d74de1df 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -535,7 +535,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
if (bp.level == c->btree_roots[bp.btree_id].level + 1)
k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
- if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+ if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
bch2_trans_iter_exit(trans, iter);
@@ -585,12 +585,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
if (IS_ERR(b))
goto err;
- if (extent_matches_bp(c, bp.btree_id, bp.level,
- bkey_i_to_s_c(&b->key),
- bucket, bp))
+ if (b && extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
return b;
- if (btree_node_will_make_reachable(b)) {
+ if (b && btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bucket, bp_offset, bp,
diff --git a/libbcachefs/bbpos.h b/libbcachefs/bbpos.h
index eaf3dbf0..1fbed1f8 100644
--- a/libbcachefs/bbpos.h
+++ b/libbcachefs/bbpos.h
@@ -2,6 +2,8 @@
#ifndef _BCACHEFS_BBPOS_H
#define _BCACHEFS_BBPOS_H
+#include "bkey_methods.h"
+
struct bbpos {
enum btree_id btree;
struct bpos pos;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index ccac2a3f..33186fa8 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -107,7 +107,7 @@
*
* BTREE NODES:
*
- * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
* free smaller than a bucket - so, that's how big our btree nodes are.
*
* (If buckets are really big we'll only use part of the bucket for a btree node
@@ -930,7 +930,6 @@ struct bch_fs {
struct time_stats times[BCH_TIME_STAT_NR];
- const char *btree_transaction_fns[BCH_TRANSACTIONS_NR];
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
};
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 9e10fc83..bfcb75a3 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -336,7 +336,7 @@ static inline void bkey_init(struct bkey *k)
* number.
*
* - WHITEOUT: for hash table btrees
-*/
+ */
#define BCH_BKEY_TYPES() \
x(deleted, 0) \
x(whiteout, 1) \
@@ -366,7 +366,8 @@ static inline void bkey_init(struct bkey *k)
x(set, 25) \
x(lru, 26) \
x(alloc_v4, 27) \
- x(backpointer, 28)
+ x(backpointer, 28) \
+ x(inode_v3, 29)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@@ -717,6 +718,21 @@ struct bch_inode_v2 {
__u8 fields[0];
} __attribute__((packed, aligned(8)));
+struct bch_inode_v3 {
+ struct bch_val v;
+
+ __le64 bi_journal_seq;
+ __le64 bi_hash_seed;
+ __le64 bi_flags;
+ __le64 bi_sectors;
+ __le64 bi_size;
+ __le64 bi_version;
+ __u8 fields[0];
+} __attribute__((packed, aligned(8)));
+
+#define INODEv3_FIELDS_START_INITIAL 6
+#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
+
struct bch_inode_generation {
struct bch_val v;
@@ -728,7 +744,7 @@ struct bch_inode_generation {
* bi_subvol and bi_parent_subvol are only set for subvolume roots:
*/
-#define BCH_INODE_FIELDS() \
+#define BCH_INODE_FIELDS_v2() \
x(bi_atime, 96) \
x(bi_ctime, 96) \
x(bi_mtime, 96) \
@@ -755,6 +771,31 @@ struct bch_inode_generation {
x(bi_subvol, 32) \
x(bi_parent_subvol, 32)
+#define BCH_INODE_FIELDS_v3() \
+ x(bi_atime, 96) \
+ x(bi_ctime, 96) \
+ x(bi_mtime, 96) \
+ x(bi_otime, 96) \
+ x(bi_uid, 32) \
+ x(bi_gid, 32) \
+ x(bi_nlink, 32) \
+ x(bi_generation, 32) \
+ x(bi_dev, 32) \
+ x(bi_data_checksum, 8) \
+ x(bi_compression, 8) \
+ x(bi_project, 32) \
+ x(bi_background_compression, 8) \
+ x(bi_data_replicas, 8) \
+ x(bi_promote_target, 16) \
+ x(bi_foreground_target, 16) \
+ x(bi_background_target, 16) \
+ x(bi_erasure_code, 16) \
+ x(bi_fields_set, 16) \
+ x(bi_dir, 64) \
+ x(bi_dir_offset, 64) \
+ x(bi_subvol, 32) \
+ x(bi_parent_subvol, 32)
+
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
x(data_checksum, 8) \
@@ -780,16 +821,16 @@ enum {
* User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
* flags)
*/
- __BCH_INODE_SYNC = 0,
- __BCH_INODE_IMMUTABLE = 1,
- __BCH_INODE_APPEND = 2,
- __BCH_INODE_NODUMP = 3,
- __BCH_INODE_NOATIME = 4,
+ __BCH_INODE_SYNC = 0,
+ __BCH_INODE_IMMUTABLE = 1,
+ __BCH_INODE_APPEND = 2,
+ __BCH_INODE_NODUMP = 3,
+ __BCH_INODE_NOATIME = 4,
- __BCH_INODE_I_SIZE_DIRTY= 5,
- __BCH_INODE_I_SECTORS_DIRTY= 6,
- __BCH_INODE_UNLINKED = 7,
- __BCH_INODE_BACKPTR_UNTRUSTED = 8,
+ __BCH_INODE_I_SIZE_DIRTY = 5,
+ __BCH_INODE_I_SECTORS_DIRTY = 6,
+ __BCH_INODE_UNLINKED = 7,
+ __BCH_INODE_BACKPTR_UNTRUSTED = 8,
/* bits 20+ reserved for packed fields below: */
};
@@ -811,6 +852,13 @@ LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
+LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_FIELDS_START,
+ struct bch_inode_v3, bi_flags, 31, 36);
+LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
+
/* Dirents */
/*
@@ -1494,7 +1542,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(freespace, 19) \
x(alloc_v4, 20) \
x(new_data_types, 21) \
- x(backpointers, 22)
+ x(backpointers, 22) \
+ x(inode_v3, 23)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c
index d348175e..f7e5d0c3 100644
--- a/libbcachefs/bkey.c
+++ b/libbcachefs/bkey.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "bkey.h"
+#include "bkey_cmp.h"
#include "bkey_methods.h"
#include "bset.h"
#include "util.h"
@@ -763,50 +764,6 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
#ifdef CONFIG_X86_64
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
- unsigned nr_key_bits)
-{
- long d0, d1, d2, d3;
- int cmp;
-
- /* we shouldn't need asm for this, but gcc is being retarded: */
-
- asm(".intel_syntax noprefix;"
- "xor eax, eax;"
- "xor edx, edx;"
- "1:;"
- "mov r8, [rdi];"
- "mov r9, [rsi];"
- "sub ecx, 64;"
- "jl 2f;"
-
- "cmp r8, r9;"
- "jnz 3f;"
-
- "lea rdi, [rdi - 8];"
- "lea rsi, [rsi - 8];"
- "jmp 1b;"
-
- "2:;"
- "not ecx;"
- "shr r8, 1;"
- "shr r9, 1;"
- "shr r8, cl;"
- "shr r9, cl;"
- "cmp r8, r9;"
-
- "3:\n"
- "seta al;"
- "setb dl;"
- "sub eax, edx;"
- ".att_syntax prefix;"
- : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
- : "0" (l), "1" (r), "3" (nr_key_bits)
- : "r8", "r9", "cc", "memory");
-
- return cmp;
-}
-
#define I(_x) (*(out)++ = (_x))
#define I1(i0) I(i0)
#define I2(i0, i1) (I1(i0), I(i1))
@@ -1037,40 +994,6 @@ int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
}
#else
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
- unsigned nr_key_bits)
-{
- u64 l_v, r_v;
-
- if (!nr_key_bits)
- return 0;
-
- /* for big endian, skip past header */
- nr_key_bits += high_bit_offset;
- l_v = *l & (~0ULL >> high_bit_offset);
- r_v = *r & (~0ULL >> high_bit_offset);
-
- while (1) {
- if (nr_key_bits < 64) {
- l_v >>= 64 - nr_key_bits;
- r_v >>= 64 - nr_key_bits;
- nr_key_bits = 0;
- } else {
- nr_key_bits -= 64;
- }
-
- if (!nr_key_bits || l_v != r_v)
- break;
-
- l = next_word(l);
- r = next_word(r);
-
- l_v = *l;
- r_v = *r;
- }
-
- return cmp_int(l_v, r_v);
-}
#endif
__pure
@@ -1078,19 +1001,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
const struct bkey_packed *r,
const struct btree *b)
{
- const struct bkey_format *f = &b->format;
- int ret;
-
- EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
- EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-
- ret = __bkey_cmp_bits(high_word(f, l),
- high_word(f, r),
- b->nr_key_bits);
-
- EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
- bkey_unpack_pos(b, r)));
- return ret;
+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
}
__pure __flatten
@@ -1106,20 +1017,7 @@ int bch2_bkey_cmp_packed(const struct btree *b,
const struct bkey_packed *l,
const struct bkey_packed *r)
{
- struct bkey unpacked;
-
- if (likely(bkey_packed(l) && bkey_packed(r)))
- return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-
- if (bkey_packed(l)) {
- __bkey_unpack_key_format_checked(b, &unpacked, l);
- l = (void*) &unpacked;
- } else if (bkey_packed(r)) {
- __bkey_unpack_key_format_checked(b, &unpacked, r);
- r = (void*) &unpacked;
- }
-
- return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+ return bch2_bkey_cmp_packed_inlined(b, l, r);
}
__pure __flatten
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index df9fb859..19b59ffe 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -5,6 +5,7 @@
#include <linux/bug.h>
#include "bcachefs_format.h"
+#include "btree_types.h"
#include "util.h"
#include "vstructs.h"
@@ -134,8 +135,9 @@ int bkey_cmp_left_packed(const struct btree *b,
}
/*
- * we prefer to pass bpos by ref, but it's often enough terribly convenient to
- * pass it by by val... as much as I hate c++, const ref would be nice here:
+ * The compiler generates better code when we pass bpos by ref, but it's often
+ * enough terribly convenient to pass it by val... as much as I hate c++, const
+ * ref would be nice here:
*/
__pure __flatten
static inline int bkey_cmp_left_packed_byval(const struct btree *b,
@@ -356,6 +358,99 @@ void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
const struct bkey_format *);
+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
+
+static inline void
+__bkey_unpack_key_format_checked(const struct btree *b,
+ struct bkey *dst,
+ const struct bkey_packed *src)
+{
+ if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) {
+ compiled_unpack_fn unpack_fn = b->aux_data;
+ unpack_fn(dst, src);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ bch2_expensive_debug_checks) {
+ struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
+
+ BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
+ }
+ } else {
+ *dst = __bch2_bkey_unpack_key(&b->format, src);
+ }
+}
+
+static inline struct bkey
+bkey_unpack_key_format_checked(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ struct bkey dst;
+
+ __bkey_unpack_key_format_checked(b, &dst, src);
+ return dst;
+}
+
+static inline void __bkey_unpack_key(const struct btree *b,
+ struct bkey *dst,
+ const struct bkey_packed *src)
+{
+ if (likely(bkey_packed(src)))
+ __bkey_unpack_key_format_checked(b, dst, src);
+ else
+ *dst = *packed_to_bkey_c(src);
+}
+
+/**
+ * bkey_unpack_key -- unpack just the key, not the value
+ */
+static inline struct bkey bkey_unpack_key(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ return likely(bkey_packed(src))
+ ? bkey_unpack_key_format_checked(b, src)
+ : *packed_to_bkey_c(src);
+}
+
+static inline struct bpos
+bkey_unpack_pos_format_checked(const struct btree *b,
+ const struct bkey_packed *src)
+{
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+ return bkey_unpack_key_format_checked(b, src).p;
+#else
+ return __bkey_unpack_pos(&b->format, src);
+#endif
+}
+
+static inline struct bpos bkey_unpack_pos(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ return likely(bkey_packed(src))
+ ? bkey_unpack_pos_format_checked(b, src)
+ : packed_to_bkey_c(src)->p;
+}
+
+/* Disassembled bkeys */
+
+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
+ const struct bkey_packed *k,
+ struct bkey *u)
+{
+ __bkey_unpack_key(b, u, k);
+
+ return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
+}
+
+/* non const version: */
+static inline struct bkey_s __bkey_disassemble(struct btree *b,
+ struct bkey_packed *k,
+ struct bkey *u)
+{
+ __bkey_unpack_key(b, u, k);
+
+ return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
+}
+
static inline u64 bkey_field_max(const struct bkey_format *f,
enum bch_bkey_fields nr)
{
diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/bkey_buf.h
index 0d7c67a9..a30c4ae8 100644
--- a/libbcachefs/bkey_buf.h
+++ b/libbcachefs/bkey_buf.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_BKEY_BUF_H
#include "bcachefs.h"
+#include "bkey.h"
struct bkey_buf {
struct bkey_i *k;
diff --git a/libbcachefs/bkey_cmp.h b/libbcachefs/bkey_cmp.h
new file mode 100644
index 00000000..5f42a6e6
--- /dev/null
+++ b/libbcachefs/bkey_cmp.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_CMP_H
+#define _BCACHEFS_BKEY_CMP_H
+
+#include "bkey.h"
+
+#ifdef CONFIG_X86_64
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+ unsigned nr_key_bits)
+{
+ long d0, d1, d2, d3;
+ int cmp;
+
+ /* we shouldn't need asm for this, but gcc is being retarded: */
+
+ asm(".intel_syntax noprefix;"
+ "xor eax, eax;"
+ "xor edx, edx;"
+ "1:;"
+ "mov r8, [rdi];"
+ "mov r9, [rsi];"
+ "sub ecx, 64;"
+ "jl 2f;"
+
+ "cmp r8, r9;"
+ "jnz 3f;"
+
+ "lea rdi, [rdi - 8];"
+ "lea rsi, [rsi - 8];"
+ "jmp 1b;"
+
+ "2:;"
+ "not ecx;"
+ "shr r8, 1;"
+ "shr r9, 1;"
+ "shr r8, cl;"
+ "shr r9, cl;"
+ "cmp r8, r9;"
+
+ "3:\n"
+ "seta al;"
+ "setb dl;"
+ "sub eax, edx;"
+ ".att_syntax prefix;"
+ : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
+ : "0" (l), "1" (r), "3" (nr_key_bits)
+ : "r8", "r9", "cc", "memory");
+
+ return cmp;
+}
+#else
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+ unsigned nr_key_bits)
+{
+ u64 l_v, r_v;
+
+ if (!nr_key_bits)
+ return 0;
+
+ /* for big endian, skip past header */
+ nr_key_bits += high_bit_offset;
+ l_v = *l & (~0ULL >> high_bit_offset);
+ r_v = *r & (~0ULL >> high_bit_offset);
+
+ while (1) {
+ if (nr_key_bits < 64) {
+ l_v >>= 64 - nr_key_bits;
+ r_v >>= 64 - nr_key_bits;
+ nr_key_bits = 0;
+ } else {
+ nr_key_bits -= 64;
+ }
+
+ if (!nr_key_bits || l_v != r_v)
+ break;
+
+ l = next_word(l);
+ r = next_word(r);
+
+ l_v = *l;
+ r_v = *r;
+ }
+
+ return cmp_int(l_v, r_v);
+}
+#endif
+
+static inline __pure __flatten
+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
+ const struct bkey_packed *r,
+ const struct btree *b)
+{
+ const struct bkey_format *f = &b->format;
+ int ret;
+
+ EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+ ret = __bkey_cmp_bits(high_word(f, l),
+ high_word(f, r),
+ b->nr_key_bits);
+
+ EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
+ bkey_unpack_pos(b, r)));
+ return ret;
+}
+
+static inline __pure __flatten
+int bch2_bkey_cmp_packed_inlined(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ struct bkey unpacked;
+
+ if (likely(bkey_packed(l) && bkey_packed(r)))
+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
+
+ if (bkey_packed(l)) {
+ __bkey_unpack_key_format_checked(b, &unpacked, l);
+ l = (void *) &unpacked;
+ } else if (bkey_packed(r)) {
+ __bkey_unpack_key_format_checked(b, &unpacked, r);
+ r = (void *) &unpacked;
+ }
+
+ return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+}
+
+#endif /* _BCACHEFS_BKEY_CMP_H */
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index e0cbac88..14d910a3 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_inode)|
(1U << KEY_TYPE_inode_v2)|
+ (1U << KEY_TYPE_inode_v3)|
(1U << KEY_TYPE_inode_generation),
[BKEY_TYPE_dirents] =
(1U << KEY_TYPE_deleted)|
diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c
index b1385a77..8518054a 100644
--- a/libbcachefs/bkey_sort.c
+++ b/libbcachefs/bkey_sort.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
+#include "bkey_cmp.h"
#include "bkey_sort.h"
#include "bset.h"
#include "extents.h"
@@ -155,7 +156,7 @@ static inline int sort_keys_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
- return bch2_bkey_cmp_packed(b, l, r) ?:
+ return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
(int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
(int) l->needs_whiteout - (int) r->needs_whiteout;
}
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
index fa60ef84..09423536 100644
--- a/libbcachefs/bset.c
+++ b/libbcachefs/bset.c
@@ -965,7 +965,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
t->size -= j - l;
for (j = l; j < t->size; j++)
- rw_aux_tree(b, t)[j].offset += shift;
+ rw_aux_tree(b, t)[j].offset += shift;
EBUG_ON(l < t->size &&
rw_aux_tree(b, t)[l].offset ==
@@ -1266,7 +1266,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
bch2_btree_node_iter_sort(iter, b);
}
-noinline __flatten __attribute__((cold))
+noinline __flatten __cold
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos *search)
{
@@ -1441,7 +1441,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
EBUG_ON(iter->data->k > iter->data->end);
if (unlikely(__btree_node_iter_set_end(iter, 0))) {
- bch2_btree_node_iter_set_drop(iter, iter->data);
+ /* avoid an expensive memmove call: */
+ iter->data[0] = iter->data[1];
+ iter->data[1] = iter->data[2];
+ iter->data[2] = (struct btree_node_iter_set) { 0, 0 };
return;
}
diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h
index 0d46534c..72e6376b 100644
--- a/libbcachefs/bset.h
+++ b/libbcachefs/bset.h
@@ -205,100 +205,6 @@ static inline size_t btree_aux_data_u64s(const struct btree *b)
return btree_aux_data_bytes(b) / sizeof(u64);
}
-typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-
-static inline void
-__bkey_unpack_key_format_checked(const struct btree *b,
- struct bkey *dst,
- const struct bkey_packed *src)
-{
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
- {
- compiled_unpack_fn unpack_fn = b->aux_data;
- unpack_fn(dst, src);
-
- if (bch2_expensive_debug_checks) {
- struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-
- BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
- }
- }
-#else
- *dst = __bch2_bkey_unpack_key(&b->format, src);
-#endif
-}
-
-static inline struct bkey
-bkey_unpack_key_format_checked(const struct btree *b,
- const struct bkey_packed *src)
-{
- struct bkey dst;
-
- __bkey_unpack_key_format_checked(b, &dst, src);
- return dst;
-}
-
-static inline void __bkey_unpack_key(const struct btree *b,
- struct bkey *dst,
- const struct bkey_packed *src)
-{
- if (likely(bkey_packed(src)))
- __bkey_unpack_key_format_checked(b, dst, src);
- else
- *dst = *packed_to_bkey_c(src);
-}
-
-/**
- * bkey_unpack_key -- unpack just the key, not the value
- */
-static inline struct bkey bkey_unpack_key(const struct btree *b,
- const struct bkey_packed *src)
-{
- return likely(bkey_packed(src))
- ? bkey_unpack_key_format_checked(b, src)
- : *packed_to_bkey_c(src);
-}
-
-static inline struct bpos
-bkey_unpack_pos_format_checked(const struct btree *b,
- const struct bkey_packed *src)
-{
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
- return bkey_unpack_key_format_checked(b, src).p;
-#else
- return __bkey_unpack_pos(&b->format, src);
-#endif
-}
-
-static inline struct bpos bkey_unpack_pos(const struct btree *b,
- const struct bkey_packed *src)
-{
- return likely(bkey_packed(src))
- ? bkey_unpack_pos_format_checked(b, src)
- : packed_to_bkey_c(src)->p;
-}
-
-/* Disassembled bkeys */
-
-static inline struct bkey_s_c bkey_disassemble(struct btree *b,
- const struct bkey_packed *k,
- struct bkey *u)
-{
- __bkey_unpack_key(b, u, k);
-
- return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-}
-
-/* non const version: */
-static inline struct bkey_s __bkey_disassemble(struct btree *b,
- struct bkey_packed *k,
- struct bkey *u)
-{
- __bkey_unpack_key(b, u, k);
-
- return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-}
-
#define for_each_bset(_b, _t) \
for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index f84b5086..8dd2db41 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -14,6 +14,12 @@
#include <linux/sched/mm.h>
#include <trace/events/bcachefs.h>
+#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
+do { \
+ if (shrinker_counter) \
+ bc->not_freed_##counter++; \
+} while (0)
+
const char * const bch2_btree_node_flags[] = {
#define x(f) #f,
BTREE_FLAGS()
@@ -175,7 +181,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
mutex_lock(&bc->lock);
ret = __bch2_btree_node_hash_insert(bc, b);
if (!ret)
- list_add(&b->list, &bc->live);
+ list_add_tail(&b->list, &bc->live);
mutex_unlock(&bc->lock);
return ret;
@@ -194,7 +200,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
* this version is for btree nodes that have already been freed (we're not
* reaping a real btree node)
*/
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
{
struct btree_cache *bc = &c->btree_cache;
int ret = 0;
@@ -204,38 +210,64 @@ wait_on_io:
if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) {
- if (!flush)
+ if (!flush) {
+ if (btree_node_dirty(b))
+ BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
+ else if (btree_node_read_in_flight(b))
+ BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+ else if (btree_node_write_in_flight(b))
+ BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
return -ENOMEM;
+ }
/* XXX: waiting on IO with btree cache lock held */
bch2_btree_node_wait_on_read(b);
bch2_btree_node_wait_on_write(b);
}
- if (!six_trylock_intent(&b->c.lock))
+ if (!six_trylock_intent(&b->c.lock)) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
return -ENOMEM;
+ }
- if (!six_trylock_write(&b->c.lock))
+ if (!six_trylock_write(&b->c.lock)) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
goto out_unlock_intent;
+ }
/* recheck under lock */
if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) {
- if (!flush)
+ if (!flush) {
+ if (btree_node_read_in_flight(b))
+ BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+ else if (btree_node_write_in_flight(b))
+ BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
goto out_unlock;
+ }
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
goto wait_on_io;
}
- if (btree_node_noevict(b) ||
- btree_node_write_blocked(b) ||
- btree_node_will_make_reachable(b))
+ if (btree_node_noevict(b)) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
+ goto out_unlock;
+ }
+ if (btree_node_write_blocked(b)) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
goto out_unlock;
+ }
+ if (btree_node_will_make_reachable(b)) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
+ goto out_unlock;
+ }
if (btree_node_dirty(b)) {
- if (!flush)
+ if (!flush) {
+ BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
goto out_unlock;
+ }
/*
* Using the underscore version because we don't want to compact
* bsets after the write, since this node is about to be evicted
@@ -263,14 +295,14 @@ out_unlock_intent:
goto out;
}
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
{
- return __btree_node_reclaim(c, b, false);
+ return __btree_node_reclaim(c, b, false, shrinker_counter);
}
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
{
- return __btree_node_reclaim(c, b, true);
+ return __btree_node_reclaim(c, b, true, false);
}
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -319,11 +351,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
if (touched >= nr)
goto out;
- if (!btree_node_reclaim(c, b)) {
+ if (!btree_node_reclaim(c, b, true)) {
btree_node_data_free(c, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
freed++;
+ bc->freed++;
}
}
restart:
@@ -332,9 +365,11 @@ restart:
if (btree_node_accessed(b)) {
clear_btree_node_accessed(b);
- } else if (!btree_node_reclaim(c, b)) {
+ bc->not_freed_access_bit++;
+ } else if (!btree_node_reclaim(c, b, true)) {
freed++;
btree_node_data_free(c, b);
+ bc->freed++;
bch2_btree_node_hash_remove(bc, b);
six_unlock_write(&b->c.lock);
@@ -390,7 +425,7 @@ static void bch2_btree_cache_shrinker_to_text(struct printbuf *out, struct shrin
struct bch_fs *c = container_of(shrink, struct bch_fs,
btree_cache.shrink);
- bch2_btree_cache_to_text(out, c);
+ bch2_btree_cache_to_text(out, &c->btree_cache);
}
void bch2_fs_btree_cache_exit(struct bch_fs *c)
@@ -548,7 +583,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
struct btree *b;
list_for_each_entry_reverse(b, &bc->live, list)
- if (!btree_node_reclaim(c, b))
+ if (!btree_node_reclaim(c, b, false))
return b;
while (1) {
@@ -583,7 +618,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks)
* disk node. Check the freed list before allocating a new one:
*/
list_for_each_entry(b, freed, list)
- if (!btree_node_reclaim(c, b)) {
+ if (!btree_node_reclaim(c, b, false)) {
list_del_init(&b->list);
goto got_node;
}
@@ -609,7 +644,7 @@ got_node:
* the list. Check if there's any freed nodes there:
*/
list_for_each_entry(b2, &bc->freeable, list)
- if (!btree_node_reclaim(c, b2)) {
+ if (!btree_node_reclaim(c, b2, false)) {
swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data);
btree_node_to_freedlist(bc, b2);
@@ -830,7 +865,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (likely(c->opts.btree_node_mem_ptr_optimization &&
b &&
b->hash_val == btree_ptr_hash_val(k)))
- goto lock_node;
+ goto lock_node;
retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
@@ -1070,7 +1105,7 @@ wait_on_io:
/* XXX we're called from btree_gc which will be holding other btree
* nodes locked
- * */
+ */
__bch2_btree_node_wait_on_read(b);
__bch2_btree_node_wait_on_write(b);
@@ -1141,9 +1176,21 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
stats.failed);
}
-void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c)
+void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc)
{
- prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
- prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
- prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
+ prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
+ prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
+ prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
+
+ prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
+ prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
+ prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
+ prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
+ prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
+ prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
+ prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
+ prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
+ prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
+ prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
+
}
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index a4df3e86..b623c702 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -4,6 +4,7 @@
#include "bcachefs.h"
#include "btree_types.h"
+#include "bkey_methods.h"
extern const char * const bch2_btree_node_flags[];
@@ -100,6 +101,6 @@ static inline unsigned btree_blocks(struct bch_fs *c)
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, struct bch_fs *);
+void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *);
#endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index a4d6998f..801a09f6 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -318,7 +318,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
" node %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf))
- ret = set_node_min(c, cur, expected_start);
+ ret = set_node_min(c, cur, expected_start);
}
out:
fsck_err:
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 2aa56555..d18346a5 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -22,6 +22,8 @@
static void btree_trans_verify_sorted(struct btree_trans *);
inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
+static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *,
+ struct btree_path *, int);
static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
@@ -1004,14 +1006,9 @@ err:
return ret;
}
-static inline bool btree_path_good_node(struct btree_trans *trans,
- struct btree_path *path,
- unsigned l, int check_pos)
+static inline bool btree_path_check_pos_in_node(struct btree_path *path,
+ unsigned l, int check_pos)
{
- if (!is_btree_node(path, l) ||
- !bch2_btree_node_relock(trans, path, l))
- return false;
-
if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
return false;
if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
@@ -1019,6 +1016,15 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
return true;
}
+static inline bool btree_path_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned l, int check_pos)
+{
+ return is_btree_node(path, l) &&
+ bch2_btree_node_relock(trans, path, l) &&
+ btree_path_check_pos_in_node(path, l, check_pos);
+}
+
static void btree_path_set_level_down(struct btree_trans *trans,
struct btree_path *path,
unsigned new_level)
@@ -1035,9 +1041,9 @@ static void btree_path_set_level_down(struct btree_trans *trans,
bch2_btree_path_verify(trans, path);
}
-static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
- struct btree_path *path,
- int check_pos)
+static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ int check_pos)
{
unsigned i, l = path->level;
again:
@@ -1058,6 +1064,16 @@ again:
return l;
}
+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ int check_pos)
+{
+ return likely(btree_node_locked(path, path->level) &&
+ btree_path_check_pos_in_node(path, path->level, check_pos))
+ ? path->level
+ : __btree_path_up_until_good_node(trans, path, check_pos);
+}
+
/*
* This is the main state machine for walking down the btree - walks down to a
* specified depth
@@ -1158,17 +1174,21 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
struct btree_path *src)
{
unsigned i, offset = offsetof(struct btree_path, pos);
+ int cmp = btree_path_cmp(dst, src);
memcpy((void *) dst + offset,
(void *) src + offset,
sizeof(struct btree_path) - offset);
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_locked(dst, i))
- six_lock_increment(&dst->l[i].b->c.lock,
- __btree_lock_want(dst, i));
+ for (i = 0; i < BTREE_MAX_DEPTH; i++) {
+ unsigned t = btree_node_locked_type(dst, i);
- bch2_btree_path_check_sort(trans, dst, 0);
+ if (t != BTREE_NODE_UNLOCKED)
+ six_lock_increment(&dst->l[i].b->c.lock, t);
+ }
+
+ if (cmp)
+ bch2_btree_path_check_sort_fast(trans, dst, cmp);
}
static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
@@ -1181,8 +1201,7 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr
return new;
}
-inline struct btree_path * __must_check
-bch2_btree_path_make_mut(struct btree_trans *trans,
+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
struct btree_path *path, bool intent,
unsigned long ip)
{
@@ -1218,7 +1237,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
path->pos = new_pos;
- bch2_btree_path_check_sort(trans, path, cmp);
+ bch2_btree_path_check_sort_fast(trans, path, cmp);
if (unlikely(path->cached)) {
btree_node_unlock(trans, path, 0);
@@ -1242,7 +1261,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
__btree_path_level_init(path, l);
}
- if (l != path->level) {
+ if (unlikely(l != path->level)) {
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
__bch2_btree_path_unlock(trans, path);
}
@@ -2518,6 +2537,25 @@ static inline void btree_path_swap(struct btree_trans *trans,
btree_path_verify_sorted_ref(trans, r);
}
+static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans,
+ struct btree_path *path,
+ int cmp)
+{
+ struct btree_path *n;
+ int cmp2;
+
+ EBUG_ON(!cmp);
+
+ while ((n = cmp < 0
+ ? prev_btree_path(trans, path)
+ : next_btree_path(trans, path)) &&
+ (cmp2 = btree_path_cmp(n, path)) &&
+ cmp2 != cmp)
+ btree_path_swap(trans, n, path);
+
+ btree_trans_verify_sorted(trans);
+}
+
inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path,
int cmp)
{
@@ -2612,7 +2650,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
unsigned flags,
unsigned long ip)
{
- if (trans->restarted)
+ if (unlikely(trans->restarted))
panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n",
bch2_err_str(trans->restarted),
(void *) trans->last_restarted_ip);
@@ -2632,7 +2670,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
btree_type_has_snapshots(btree_id))
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
- if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags))
+ if (trans->journal_replay_not_finished)
flags |= BTREE_ITER_WITH_JOURNAL;
iter->trans = trans;
@@ -2816,7 +2854,7 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
BUG_ON(trans->used_mempool);
#ifdef __KERNEL__
- p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
+ p = this_cpu_xchg(c->btree_paths_bufs->path, NULL);
#endif
if (!p)
p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
@@ -2825,15 +2863,16 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
trans->updates = p; p += updates_bytes;
}
-static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c,
- const char *fn)
+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+
+unsigned bch2_trans_get_fn_idx(const char *fn)
{
unsigned i;
- for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++)
- if (!c->btree_transaction_fns[i] ||
- c->btree_transaction_fns[i] == fn) {
- c->btree_transaction_fns[i] = fn;
+ for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
+ if (!bch2_btree_transaction_fns[i] ||
+ bch2_btree_transaction_fns[i] == fn) {
+ bch2_btree_transaction_fns[i] = fn;
return i;
}
@@ -2841,7 +2880,7 @@ static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct b
return i;
}
-void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn)
+void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier)
{
struct btree_transaction_stats *s;
@@ -2851,10 +2890,13 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
memset(trans, 0, sizeof(*trans));
trans->c = c;
- trans->fn = fn;
+ trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
+ ? bch2_btree_transaction_fns[fn_idx] : NULL;
trans->last_begin_time = local_clock();
- trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
+ trans->fn_idx = fn_idx;
trans->locking_wait.task = current;
+ trans->journal_replay_not_finished =
+ !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
closure_init_stack(&trans->ref);
bch2_trans_alloc_paths(trans, c);
@@ -2979,7 +3021,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
rcu_read_lock();
owner = READ_ONCE(b->lock.owner);
- pid = owner ? owner->pid : 0;;
+ pid = owner ? owner->pid : 0;
rcu_read_unlock();
prt_tab(out);
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 910f6d7b..0775cfa2 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -131,9 +131,20 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
_path = __trans_next_path_with_node((_trans), (_b), \
(_path)->idx + 1))
-struct btree_path * __must_check
-bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
bool, unsigned long);
+
+static inline struct btree_path * __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+ struct btree_path *path, bool intent,
+ unsigned long ip)
+{
+ if (path->ref > 1 || path->preserve)
+ path = __bch2_btree_path_make_mut(trans, path, intent, ip);
+ path->should_be_locked = false;
+ return path;
+}
+
struct btree_path * __must_check
bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *,
struct bpos, bool, unsigned long);
@@ -551,10 +562,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
-void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *);
+void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned);
void bch2_trans_exit(struct btree_trans *);
-#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__)
+extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+unsigned bch2_trans_get_fn_idx(const char *);
+
+#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \
+do { \
+ static unsigned trans_fn_idx; \
+ \
+ if (unlikely(!trans_fn_idx)) \
+ trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
+ \
+ __bch2_trans_init(_trans, _c, trans_fn_idx); \
+} while (0)
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index b8ed25b9..cd52dd5a 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_cache.h"
@@ -103,6 +104,22 @@ static void bkey_cached_free(struct btree_key_cache *bc,
six_unlock_intent(&ck->c.lock);
}
+static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ struct bkey_cached *pos;
+
+ list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
+ if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
+ pos->btree_trans_barrier_seq)) {
+ list_move(&ck->list, &pos->list);
+ return;
+ }
+ }
+
+ list_move(&ck->list, &bc->freed_nonpcpu);
+}
+
static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
struct bkey_cached *ck)
{
@@ -130,11 +147,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
while (f->nr > ARRAY_SIZE(f->objs) / 2) {
struct bkey_cached *ck2 = f->objs[--f->nr];
- list_move_tail(&ck2->list, &bc->freed_nonpcpu);
+ __bkey_cached_move_to_freelist_ordered(bc, ck2);
}
preempt_enable();
- list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ __bkey_cached_move_to_freelist_ordered(bc, ck);
mutex_unlock(&bc->lock);
}
#else
@@ -295,7 +312,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
bool was_new = true;
ck = bkey_cached_alloc(trans, path);
- if (unlikely(IS_ERR(ck)))
+ if (IS_ERR(ck))
return ck;
if (unlikely(!ck)) {
@@ -416,7 +433,7 @@ err:
return ret;
}
-noinline static int
+static noinline int
bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
unsigned flags)
{
@@ -597,7 +614,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
* Since journal reclaim depends on us making progress here, and the
* allocator/copygc depend on journal reclaim making progress, we need
* to be using alloc reserves:
- * */
+ */
ret = bch2_btree_iter_traverse(&b_iter) ?:
bch2_trans_update(trans, &b_iter, ck->k,
BTREE_UPDATE_KEY_CACHE_RECLAIM|
@@ -982,7 +999,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->table_init_done = true;
- bc->shrink.seeks = 1;
+ bc->shrink.seeks = 0;
bc->shrink.count_objects = bch2_btree_key_cache_count;
bc->shrink.scan_objects = bch2_btree_key_cache_scan;
bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text;
@@ -991,15 +1008,17 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
{
- prt_printf(out, "nr_freed:\t%zu\n", atomic_long_read(&c->nr_freed));
- prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys));
- prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty));
+ prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed));
+ prt_newline(out);
+ prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
+ prt_newline(out);
+ prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty));
+ prt_newline(out);
}
void bch2_btree_key_cache_exit(void)
{
- if (bch2_key_cache)
- kmem_cache_destroy(bch2_key_cache);
+ kmem_cache_destroy(bch2_key_cache);
}
int __init bch2_btree_key_cache_init(void)
diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c
index 93a6ebed..9d090437 100644
--- a/libbcachefs/btree_locking.c
+++ b/libbcachefs/btree_locking.c
@@ -274,7 +274,7 @@ next:
b = &READ_ONCE(path->l[top->level].b)->c;
- if (unlikely(IS_ERR_OR_NULL(b))) {
+ if (IS_ERR_OR_NULL(b)) {
BUG_ON(!lock_graph_remove_non_waiters(&g));
goto next;
}
@@ -605,7 +605,7 @@ int bch2_trans_relock(struct btree_trans *trans)
struct btree_path *path;
if (unlikely(trans->restarted))
- return - ((int) trans->restarted);
+ return -((int) trans->restarted);
trans_for_each_path(trans, path)
if (path->should_be_locked &&
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index af226eed..892d1231 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -6,7 +6,7 @@
#include <linux/rhashtable.h>
#include <linux/six.h>
-#include "bkey_methods.h"
+//#include "bkey_methods.h"
#include "buckets_types.h"
#include "darray.h"
#include "journal_types.h"
@@ -160,6 +160,16 @@ struct btree_cache {
/* Number of elements in live + freeable lists */
unsigned used;
unsigned reserve;
+ unsigned freed;
+ unsigned not_freed_lock_intent;
+ unsigned not_freed_lock_write;
+ unsigned not_freed_dirty;
+ unsigned not_freed_read_in_flight;
+ unsigned not_freed_write_in_flight;
+ unsigned not_freed_noevict;
+ unsigned not_freed_write_blocked;
+ unsigned not_freed_will_make_reachable;
+ unsigned not_freed_access_bit;
atomic_t dirty;
struct shrinker shrink;
@@ -408,6 +418,7 @@ struct btree_trans {
bool in_traverse_all:1;
bool memory_allocation_failure:1;
bool is_initial_gc:1;
+ bool journal_replay_not_finished:1;
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_restarted_ip;
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 03c4fd09..40debf75 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -2046,7 +2046,7 @@ static int async_btree_node_rewrite_trans(struct btree_trans *trans,
goto out;
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
-out :
+out:
bch2_trans_iter_exit(trans, &iter);
return ret;
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index b166ab4b..3a683820 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -339,7 +339,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
- unsigned old_u64s = ck->u64s, new_u64s;
+ unsigned new_u64s;
struct bkey_i *new_k;
EBUG_ON(path->level);
@@ -368,12 +368,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
ck->u64s = new_u64s;
ck->k = new_k;
- /*
- * Keys returned by peek() are no longer valid pointers, so we need a
- * transaction restart:
- */
- trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s);
- return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
+ return 0;
}
/* Triggers: */
@@ -1388,11 +1383,41 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
static int __must_check
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags,
+ unsigned long ip);
+
+static noinline int flush_new_cached_update(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_insert_entry *i,
+ enum btree_update_flags flags,
+ unsigned long ip)
+{
+ struct btree_path *btree_path;
+ int ret;
+
+ i->key_cache_already_flushed = true;
+ i->flags |= BTREE_TRIGGER_NORUN;
+
+ btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
+ BTREE_ITER_INTENT, _THIS_IP_);
+
+ ret = bch2_btree_path_traverse(trans, btree_path, 0);
+ if (ret)
+ goto err;
+
+ btree_path_set_should_be_locked(btree_path);
+ ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip);
+err:
+ bch2_path_put(trans, btree_path, true);
+ return ret;
+}
+
+static int __must_check
+bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
+ struct bkey_i *k, enum btree_update_flags flags,
unsigned long ip)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i, n;
- int ret = 0;
BUG_ON(!path->should_be_locked);
@@ -1461,27 +1486,10 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
* the key cache - but the key has to exist in the btree for that to
* work:
*/
- if (path->cached &&
- bkey_deleted(&i->old_k)) {
- struct btree_path *btree_path;
-
- i->key_cache_already_flushed = true;
- i->flags |= BTREE_TRIGGER_NORUN;
+ if (unlikely(path->cached && bkey_deleted(&i->old_k)))
+ return flush_new_cached_update(trans, path, i, flags, ip);
- btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
- BTREE_ITER_INTENT, _THIS_IP_);
-
- ret = bch2_btree_path_traverse(trans, btree_path, 0);
- if (ret)
- goto err;
-
- btree_path_set_should_be_locked(btree_path);
- ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
-err:
- bch2_path_put(trans, btree_path, true);
- }
-
- return ret;
+ return 0;
}
static int __must_check
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index c611931f..116711fc 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -89,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
: ca->usage[journal_seq & JOURNAL_BUF_MASK]);
}
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
{
struct bch_fs *c = ca->fs;
- struct bch_dev_usage ret;
unsigned seq, i, u64s = dev_usage_u64s();
do {
seq = read_seqcount_begin(&c->usage_lock);
- memcpy(&ret, ca->usage_base, u64s * sizeof(u64));
+ memcpy(usage, ca->usage_base, u64s * sizeof(u64));
for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
- acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s);
+ acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
} while (read_seqcount_retry(&c->usage_lock, seq));
-
- return ret;
}
static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@@ -923,7 +920,7 @@ int bch2_mark_extent(struct btree_trans *trans,
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
- struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -1115,10 +1112,10 @@ int bch2_mark_inode(struct btree_trans *trans,
u64 journal_seq = trans->journal_res.seq;
if (flags & BTREE_TRIGGER_INSERT) {
- struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
+ struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
BUG_ON(!journal_seq);
- BUG_ON(new.k->type != KEY_TYPE_inode_v2);
+ BUG_ON(new.k->type != KEY_TYPE_inode_v3);
v->bi_journal_seq = cpu_to_le64(journal_seq);
}
@@ -1142,7 +1139,7 @@ int bch2_mark_reservation(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bch_fs_usage __percpu *fs_usage;
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
s64 sectors = (s64) k.k->size;
@@ -1221,7 +1218,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref;
size_t l, r, m;
@@ -2113,5 +2110,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
return -ENOMEM;
}
- return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
}
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 6881502d..56c06ccd 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -139,7 +139,15 @@ static inline u8 ptr_stale(struct bch_dev *ca,
/* Device usage: */
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
+void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *);
+static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+{
+ struct bch_dev_usage ret;
+
+ bch2_dev_usage_read_fast(ca, &ret);
+ return ret;
+}
+
void bch2_dev_usage_init(struct bch_dev *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve)
@@ -240,8 +248,6 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct
int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c
index b5850a76..3268e8d4 100644
--- a/libbcachefs/checksum.c
+++ b/libbcachefs/checksum.c
@@ -131,7 +131,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
size_t orig_len = len;
int ret, i;
- sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL);
+ sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
if (!sg)
return -ENOMEM;
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index f692f35a..2b7080b6 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -377,7 +377,7 @@ static unsigned __bio_compress(struct bch_fs *c,
/* If it's only one block, don't bother trying to compress: */
if (src->bi_iter.bi_size <= c->opts.block_size)
- return 0;
+ return BCH_COMPRESSION_TYPE_incompressible;
dst_data = bio_map_or_bounce(c, dst, WRITE);
src_data = bio_map_or_bounce(c, src, READ);
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 5ef35e3b..b75ff07e 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -312,7 +312,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
bch2_write_op_init(&m->op, c, io_opts);
m->op.pos = bkey_start_pos(k.k);
m->op.version = k.k->version;
- m->op.target = data_opts.target,
+ m->op.target = data_opts.target;
m->op.write_point = wp;
m->op.flags |= BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED|
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index d87131f5..57602c8e 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -477,7 +477,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
if (i->iter < tbl->size) {
rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
bch2_cached_btree_node_to_text(&i->buf, c, b);
- i->iter++;;
+ i->iter++;
} else {
done = true;
}
@@ -637,11 +637,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
if (!i->size)
break;
- if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) ||
- !c->btree_transaction_fns[i->iter])
+ if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
+ !bch2_btree_transaction_fns[i->iter])
break;
- prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]);
+ prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 4d942d22..288f46b5 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -103,7 +103,7 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) {
prt_printf(err, "value too big (%zu > %u)",
- bkey_val_u64s(k.k),dirent_val_u64s(len));
+ bkey_val_u64s(k.k), dirent_val_u64s(len));
return -EINVAL;
}
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 2ca13014..9e2a4ed4 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -292,7 +292,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
lp.crc.uncompressed_size) {
/* can use left extent's crc entry */
- } else if (lp.crc.live_size <= rp.crc.offset ) {
+ } else if (lp.crc.live_size <= rp.crc.offset) {
/* can use right extent's crc entry */
} else {
/* check if checksums can be merged: */
@@ -351,7 +351,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
crc_l.uncompressed_size) {
/* can use left extent's crc entry */
- } else if (crc_l.live_size <= crc_r.offset ) {
+ } else if (crc_l.live_size <= crc_r.offset) {
/* can use right extent's crc entry */
crc_r.offset -= crc_l.live_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index e9dd1d13..1f2e1fc4 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -487,11 +487,11 @@ int bch2_rename_trans(struct btree_trans *trans,
ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
(src_dir.inum != dst_dir.inum
? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
- : 0 ) ?:
+ : 0) ?:
bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
(dst_inum.inum
? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
- : 0 );
+ : 0);
err:
bch2_trans_iter_exit(trans, &dst_inode_iter);
bch2_trans_iter_exit(trans, &src_inode_iter);
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 74292063..706180b9 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -1684,7 +1684,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
unsigned pg_len = min_t(unsigned, len - copied,
PAGE_SIZE - pg_offset);
unsigned pg_copied = copy_page_from_iter_atomic(page,
- pg_offset, pg_len,iter);
+ pg_offset, pg_len, iter);
if (!pg_copied)
break;
@@ -2137,8 +2137,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
struct iovec *iov = dio->inline_vecs;
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
- iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
- GFP_KERNEL);
+ iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
+ GFP_KERNEL);
if (unlikely(!iov)) {
dio->sync = sync = true;
goto do_io;
@@ -2713,7 +2713,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
truncate_pagecache_range(&inode->v, offset, end - 1);
- if (block_start < block_end ) {
+ if (block_start < block_end) {
s64 i_sectors_delta = 0;
ret = bch2_fpunch(c, inode_inum(inode),
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index bf82737d..186faa54 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -528,7 +528,7 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
- if (unlikely(IS_ERR(inode)))
+ if (IS_ERR(inode))
return bch2_err_class(PTR_ERR(inode));
inode_lock(&inode->v);
@@ -1846,7 +1846,7 @@ got_sb:
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
c->vfs_sb = sb;
- strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+ strscpy(sb->s_id, c->name, sizeof(sb->s_id));
ret = super_setup_bdi(sb);
if (ret)
@@ -1917,8 +1917,7 @@ MODULE_ALIAS_FS("bcachefs");
void bch2_vfs_exit(void)
{
unregister_filesystem(&bcache_fs_type);
- if (bch2_inode_cache)
- kmem_cache_destroy(bch2_inode_cache);
+ kmem_cache_destroy(bch2_inode_cache);
}
int __init bch2_vfs_init(void)
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 12f2ef44..ca95d85b 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -2044,7 +2044,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
{
if (t->nr == t->size) {
size_t new_size = max_t(size_t, 128UL, t->size * 2);
- void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
+ void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
+
if (!d) {
bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
new_size);
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 1f2782fc..1a0d2608 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes;
}
-void bch2_inode_pack(struct bch_fs *c,
- struct bkey_inode_buf *packed,
- const struct bch_inode_unpacked *inode)
+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
+ const struct bch_inode_unpacked *inode)
{
- struct bkey_i_inode_v2 *k = &packed->inode;
+ struct bkey_i_inode_v3 *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
@@ -72,13 +71,17 @@ void bch2_inode_pack(struct bch_fs *c,
unsigned bytes;
int ret;
- bkey_inode_v2_init(&packed->inode.k_i);
+ bkey_inode_v3_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
- packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
- packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
+ packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors);
+ packed->inode.v.bi_size = cpu_to_le64(inode->bi_size);
+ packed->inode.v.bi_version = cpu_to_le64(inode->bi_version);
+ SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
+ SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
+
#define x(_name, _bits) \
nr_fields++; \
@@ -99,7 +102,7 @@ void bch2_inode_pack(struct bch_fs *c,
*out++ = 0; \
}
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v3()
#undef x
BUG_ON(out > end);
@@ -110,7 +113,7 @@ void bch2_inode_pack(struct bch_fs *c,
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
- SET_INODEv2_NR_FIELDS(&k->v, nr_fields);
+ SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
@@ -120,16 +123,25 @@ void bch2_inode_pack(struct bch_fs *c,
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
+ BUG_ON(unpacked.bi_sectors != inode->bi_sectors);
+ BUG_ON(unpacked.bi_size != inode->bi_size);
+ BUG_ON(unpacked.bi_version != inode->bi_version);
BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name);
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v3()
#undef x
}
}
+void bch2_inode_pack(struct bkey_inode_buf *packed,
+ const struct bch_inode_unpacked *inode)
+{
+ bch2_inode_pack_inlined(packed, inode);
+}
+
static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
@@ -157,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
unpacked->_name = field[1]; \
in += ret;
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v2()
#undef x
/* XXX: signal if there were more fields than expected? */
@@ -196,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
return -1; \
fieldnr++;
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v2()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
-int bch2_inode_unpack(struct bkey_s_c k,
- struct bch_inode_unpacked *unpacked)
+static int bch2_inode_unpack_v3(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
+{
+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+ const u8 *in = inode.v->fields;
+ const u8 *end = bkey_val_end(inode);
+ unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
+ unsigned fieldnr = 0;
+ int ret;
+ u64 v[2];
+
+ unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
+ unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors);
+ unpacked->bi_size = le64_to_cpu(inode.v->bi_size);
+ unpacked->bi_version = le64_to_cpu(inode.v->bi_version);
+ unpacked->bi_mode = INODEv3_MODE(inode.v);
+
+#define x(_name, _bits) \
+ if (fieldnr < nr_fields) { \
+ ret = bch2_varint_decode_fast(in, end, &v[0]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ \
+ if (_bits > 64) { \
+ ret = bch2_varint_decode_fast(in, end, &v[1]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ } else { \
+ v[1] = 0; \
+ } \
+ } else { \
+ v[0] = v[1] = 0; \
+ } \
+ \
+ unpacked->_name = v[0]; \
+ if (v[1] || v[0] != unpacked->_name) \
+ return -1; \
+ fieldnr++;
+
+ BCH_INODE_FIELDS_v3()
+#undef x
+
+ /* XXX: signal if there were more fields than expected? */
+ return 0;
+}
+
+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
{
switch (k.k->type) {
case KEY_TYPE_inode: {
@@ -243,6 +306,14 @@ int bch2_inode_unpack(struct bkey_s_c k,
}
}
+int bch2_inode_unpack(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
+{
+ if (likely(k.k->type == KEY_TYPE_inode_v3))
+ return bch2_inode_unpack_v3(k, unpacked);
+ return bch2_inode_unpack_slowpath(k, unpacked);
+}
+
int bch2_inode_peek(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
@@ -288,11 +359,29 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
- bch2_inode_pack(trans->c, inode_p, inode);
+ bch2_inode_pack_inlined(inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot;
return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
}
+struct bkey_s_c bch2_inode_to_v3(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bch_inode_unpacked u;
+ struct bkey_inode_buf *inode_p;
+ int ret;
+
+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+ if (IS_ERR(inode_p))
+ return bkey_s_c_err(PTR_ERR(inode_p));
+
+ ret = bch2_inode_unpack(k, &u);
+ if (ret)
+ return bkey_s_c_err(ret);
+
+ bch2_inode_pack(inode_p, &u);
+ return bkey_i_to_s_c(&inode_p->inode.k_i);
+}
+
static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
{
struct bch_inode_unpacked unpacked;
@@ -307,7 +396,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
return -EINVAL;
}
- if (bch2_inode_unpack(k, &unpacked)){
+ if (bch2_inode_unpack(k, &unpacked)) {
prt_printf(err, "invalid variable length fields");
return -EINVAL;
}
@@ -378,15 +467,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
return __bch2_inode_invalid(k, err);
}
-static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
+ int rw, struct printbuf *err)
+{
+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+
+ if (bkey_val_bytes(k.k) < sizeof(*inode.v)) {
+ prt_printf(err, "incorrect value size (%zu < %zu)",
+ bkey_val_bytes(k.k), sizeof(*inode.v));
+ return -EINVAL;
+ }
+
+ if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
+ INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
+ prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
+ INODEv3_FIELDS_START(inode.v),
+ INODEv3_FIELDS_START_INITIAL,
+ bkey_val_u64s(inode.k));
+ return -EINVAL;
+ }
+
+ if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
+ prt_printf(err, "invalid str hash type (%llu >= %u)",
+ INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
+ return -EINVAL;
+ }
+
+ return __bch2_inode_invalid(k, err);
+}
+
+static void __bch2_inode_unpacked_to_text(struct printbuf *out,
+ struct bch_inode_unpacked *inode)
{
- prt_printf(out, "mode %o flags %x journal_seq %llu",
+ prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
inode->bi_mode, inode->bi_flags,
- inode->bi_journal_seq);
+ inode->bi_journal_seq,
+ inode->bi_size,
+ inode->bi_sectors,
+ inode->bi_version);
#define x(_name, _bits) \
prt_printf(out, " "#_name " %llu", (u64) inode->_name);
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v3()
#undef x
}
@@ -396,8 +518,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
__bch2_inode_unpacked_to_text(out, inode);
}
-void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
- struct bkey_s_c k)
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bch_inode_unpacked inode;
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 2ac2fc10..2915f4f9 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -2,12 +2,14 @@
#ifndef _BCACHEFS_INODE_H
#define _BCACHEFS_INODE_H
+#include "bkey.h"
#include "opts.h"
extern const char * const bch2_inode_opts[];
int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
+int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode (struct bkey_ops) { \
@@ -24,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.atomic_trigger = bch2_mark_inode, \
}
+#define bch2_bkey_ops_inode_v3 (struct bkey_ops) { \
+ .key_invalid = bch2_inode_v3_invalid, \
+ .val_to_text = bch2_inode_to_text, \
+ .trans_trigger = bch2_trans_mark_inode, \
+ .atomic_trigger = bch2_mark_inode, \
+}
+
static inline bool bkey_is_inode(const struct bkey *k)
{
return k->type == KEY_TYPE_inode ||
- k->type == KEY_TYPE_inode_v2;
+ k->type == KEY_TYPE_inode_v2 ||
+ k->type == KEY_TYPE_inode_v3;
}
int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
@@ -51,25 +61,28 @@ struct bch_inode_unpacked {
u64 bi_inum;
u64 bi_journal_seq;
__le64 bi_hash_seed;
+ u64 bi_size;
+ u64 bi_sectors;
+ u64 bi_version;
u32 bi_flags;
u16 bi_mode;
#define x(_name, _bits) u##_bits _name;
- BCH_INODE_FIELDS()
+ BCH_INODE_FIELDS_v3()
#undef x
};
struct bkey_inode_buf {
- struct bkey_i_inode_v2 inode;
+ struct bkey_i_inode_v3 inode;
#define x(_name, _bits) + 8 + _bits / 8
- u8 _pad[0 + BCH_INODE_FIELDS()];
+ u8 _pad[0 + BCH_INODE_FIELDS_v3()];
#undef x
} __attribute__((packed, aligned(8)));
-void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
- const struct bch_inode_unpacked *);
+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
+struct bkey_s_c bch2_inode_to_v3(struct btree_trans *, struct bkey_s_c);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 558d0c23..5971569e 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -242,8 +242,7 @@ int bch2_extent_update(struct btree_trans *trans,
s64 *i_sectors_delta_total,
bool check_enospc)
{
- struct btree_iter inode_iter;
- struct bch_inode_unpacked inode_u;
+ struct btree_iter inode_iter = { NULL };
struct bpos next_pos;
bool usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@@ -283,32 +282,67 @@ int bch2_extent_update(struct btree_trans *trans,
return ret;
}
- ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
- BTREE_ITER_INTENT);
- if (ret)
- return ret;
+ if (new_i_size || i_sectors_delta) {
+ struct bkey_s_c k;
+ struct bkey_s_c_inode_v3 inode;
+ struct bkey_i_inode_v3 *new_inode;
+ bool i_size_update;
+
+ bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes,
+ SPOS(0, inum.inum, iter->snapshot),
+ BTREE_ITER_INTENT|BTREE_ITER_CACHED);
+ k = bch2_btree_iter_peek_slot(&inode_iter);
+ ret = bkey_err(k);
+ if (unlikely(ret))
+ goto err;
+
+ ret = bkey_is_inode(k.k) ? 0 : -ENOENT;
+ if (unlikely(ret))
+ goto err;
+
+ if (unlikely(k.k->type != KEY_TYPE_inode_v3)) {
+ k = bch2_inode_to_v3(trans, k);
+ ret = bkey_err(k);
+ if (unlikely(ret))
+ goto err;
+ }
+
+ inode = bkey_s_c_to_inode_v3(k);
+ i_size_update = !(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
+ new_i_size > le64_to_cpu(inode.v->bi_size);
+
+ if (!i_sectors_delta && !i_size_update)
+ goto no_inode_update;
+
+ new_inode = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(new_inode);
+ if (unlikely(ret))
+ goto err;
- if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
- new_i_size > inode_u.bi_size)
- inode_u.bi_size = new_i_size;
+ bkey_reassemble(&new_inode->k_i, k);
- inode_u.bi_sectors += i_sectors_delta;
+ if (i_size_update)
+ new_inode->v.bi_size = cpu_to_le64(new_i_size);
+ le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta);
+ ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0);
+ if (unlikely(ret))
+ goto err;
+ }
+no_inode_update:
ret = bch2_trans_update(trans, iter, k, 0) ?:
- bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL);
- bch2_trans_iter_exit(trans, &inode_iter);
-
- if (ret)
- return ret;
+ if (unlikely(ret))
+ goto err;
if (i_sectors_delta_total)
*i_sectors_delta_total += i_sectors_delta;
bch2_btree_iter_set_pos(iter, next_pos);
-
- return 0;
+err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ return ret;
}
/*
@@ -926,8 +960,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
saved_iter = dst->bi_iter;
do {
- struct bch_extent_crc_unpacked crc =
- (struct bch_extent_crc_unpacked) { 0 };
+ struct bch_extent_crc_unpacked crc = { 0 };
struct bversion version = op->version;
size_t dst_len, src_len;
@@ -979,6 +1012,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
!crc_is_compressed(crc) &&
bch2_csum_type_is_encryption(op->crc.csum_type) ==
bch2_csum_type_is_encryption(op->csum_type)) {
+ u8 compression_type = crc.compression_type;
+ u16 nonce = crc.nonce;
/*
* Note: when we're using rechecksum(), we need to be
* checksumming @src because it has all the data our
@@ -997,6 +1032,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
bio_sectors(src) - (src_len >> 9),
op->csum_type))
goto csum_err;
+ /*
+ * rchecksum_bio sets compression_type on crc from op->crc,
+ * this isn't always correct as sometimes we're changing
+ * an extent from uncompressed to incompressible.
+ */
+ crc.compression_type = compression_type;
+ crc.nonce = nonce;
} else {
if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
bch2_rechecksum_bio(c, src, version, op->crc,
@@ -1115,8 +1157,8 @@ again:
BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
EBUG_ON(!wp);
- if (unlikely(IS_ERR(wp))) {
- if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
+ if (IS_ERR(wp)) {
+ if (unlikely(wp != ERR_PTR(-EAGAIN))) {
ret = PTR_ERR(wp);
goto err;
}
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index ab594623..95c29229 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -739,7 +739,7 @@ int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
return ret;
entry = container_of(journal_res_entry(j, &res),
- struct jset_entry_log, entry);;
+ struct jset_entry_log, entry);
memset(entry, 0, u64s * sizeof(u64));
entry->entry.type = BCH_JSET_ENTRY_log;
entry->entry.u64s = u64s - 1;
@@ -796,10 +796,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch2_journal_block(&c->journal);
}
- bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);
- ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL);
- new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
- new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
+ bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
+ ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
+ new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL);
+ new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
ret = -ENOMEM;
goto err_unblock;
@@ -1265,7 +1265,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
rcu_read_lock();
s = READ_ONCE(j->reservations);
- prt_printf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size);
+ prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size);
prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index d3caa7ea..9428f423 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -110,6 +110,7 @@
*/
#include <linux/hash.h>
+#include <linux/prefetch.h>
#include "journal_types.h"
@@ -304,15 +305,26 @@ static inline int journal_res_get_fast(struct journal *j,
{
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
+ unsigned u64s, offset;
do {
old.v = new.v = v;
/*
+ * Round up the end of the journal reservation to the next
+ * cacheline boundary:
+ */
+ u64s = res->u64s;
+ offset = sizeof(struct jset) / sizeof(u64) +
+ new.cur_entry_offset + u64s;
+ u64s += ((offset - 1) & ((SMP_CACHE_BYTES / sizeof(u64)) - 1)) + 1;
+
+
+ /*
* Check if there is still room in the current journal
* entry:
*/
- if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
+ if (new.cur_entry_offset + u64s > j->cur_entry_u64s)
return 0;
EBUG_ON(!journal_state_count(new, new.idx));
@@ -320,7 +332,7 @@ static inline int journal_res_get_fast(struct journal *j,
if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
return 0;
- new.cur_entry_offset += res->u64s;
+ new.cur_entry_offset += u64s;
journal_state_inc(&new);
/*
@@ -337,8 +349,15 @@ static inline int journal_res_get_fast(struct journal *j,
res->ref = true;
res->idx = old.idx;
+ res->u64s = u64s;
res->offset = old.cur_entry_offset;
res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
+
+ offset = res->offset;
+ while (offset < res->offset + res->u64s) {
+ prefetchw(vstruct_idx(j->buf[res->idx].data, offset));
+ offset += SMP_CACHE_BYTES / sizeof(u64);
+ }
return 1;
}
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index e69595bd..e873ce2a 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -232,7 +232,7 @@ void bch2_journal_space_available(struct journal *j)
if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&
(clean - clean_ondisk <= total / 8) &&
- (clean_ondisk * 2 > clean ))
+ (clean_ondisk * 2 > clean))
set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
@@ -363,7 +363,7 @@ static inline void __journal_pin_drop(struct journal *j,
list_del_init(&pin->list);
/*
- * Unpinning a journal entry make make journal_next_bucket() succeed, if
+ * Unpinning a journal entry may make journal_next_bucket() succeed if
* writing a new last_seq will now make another bucket available:
*/
if (atomic_dec_and_test(&pin_list->count) &&
diff --git a/libbcachefs/journal_sb.c b/libbcachefs/journal_sb.c
index cfdbd92d..c19db042 100644
--- a/libbcachefs/journal_sb.c
+++ b/libbcachefs/journal_sb.c
@@ -31,7 +31,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb,
if (!nr)
return 0;
- b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
+ b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
if (!b)
return -ENOMEM;
@@ -114,7 +114,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
if (!nr)
return 0;
- b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL);
+ b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
if (!b)
return -ENOMEM;
diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c
index cda77835..5e85055b 100644
--- a/libbcachefs/keylist.c
+++ b/libbcachefs/keylist.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey.h"
#include "keylist.h"
int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 55fdacad..74869204 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -497,7 +497,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @k elsewhere:
- */
+ */
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
@@ -868,7 +868,7 @@ static bool migrate_pred(struct bch_fs *c, void *arg,
i++;
}
- return data_opts->rewrite_ptrs != 0;;
+ return data_opts->rewrite_ptrs != 0;
}
static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 18f6ec5c..ea7810a1 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -225,7 +225,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
.size = max_t(size_t, keys->size, 8) * 2,
};
- new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
+ new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
if (!new_keys.d) {
bch_err(c, "%s: error allocating new key array (size %zu)",
__func__, new_keys.size);
@@ -502,7 +502,7 @@ static int journal_keys_sort(struct bch_fs *c)
keys->size = roundup_pow_of_two(nr_keys);
- keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL);
+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
if (!keys->d)
return -ENOMEM;
@@ -1092,6 +1092,9 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
+ } else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
+ bch_info(c, "version prior to inode_v3, upgrade required");
+ c->opts.version_upgrade = true;
}
}
@@ -1458,7 +1461,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
- if (c->sb.version < bcachefs_metadata_version_backpointers)
+ if (c->sb.version < bcachefs_metadata_version_inode_v3)
c->opts.version_upgrade = true;
if (c->opts.version_upgrade) {
@@ -1537,7 +1540,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
- bch2_inode_pack(c, &packed_inode, &root_inode);
+ bch2_inode_pack(&packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory";
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index 87820b2e..cc34b380 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_REPLICAS_H
#define _BCACHEFS_REPLICAS_H
+#include "bkey.h"
#include "eytzinger.h"
#include "replicas_types.h"
diff --git a/libbcachefs/siphash.c b/libbcachefs/siphash.c
index c062edb3..dc1a27cc 100644
--- a/libbcachefs/siphash.c
+++ b/libbcachefs/siphash.c
@@ -160,7 +160,7 @@ u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
memset(ctx, 0, sizeof(*ctx));
- return (r);
+ return r;
}
u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index cbc5979a..60c1f03c 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -100,8 +100,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb)
{
- if (sb->bio)
- kfree(sb->bio);
+ kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev))
blkdev_put(sb->bdev, sb->mode);
@@ -149,8 +148,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
- if (sb->bio)
- kfree(sb->bio);
+ kfree(sb->bio);
sb->bio = bio;
}
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 3f674bf0..5be4c40a 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -327,26 +327,12 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
- ret = bch2_gc_thread_start(c);
- if (ret) {
- bch_err(c, "error starting gc thread");
- return ret;
- }
-
- ret = bch2_copygc_start(c);
- if (ret) {
- bch_err(c, "error starting copygc thread");
- return ret;
- }
-
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
return ret;
}
- schedule_work(&c->ec_stripe_delete_work);
-
return 0;
}
@@ -385,6 +371,20 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ ret = bch2_gc_thread_start(c);
+ if (ret) {
+ bch_err(c, "error starting gc thread");
+ return ret;
+ }
+
+ ret = bch2_copygc_start(c);
+ if (ret) {
+ bch_err(c, "error starting copygc thread");
+ return ret;
+ }
+
+ schedule_work(&c->ec_stripe_delete_work);
+
bch2_do_discards(c);
bch2_do_invalidates(c);
@@ -463,8 +463,8 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap);
- if (c->io_complete_wq )
- destroy_workqueue(c->io_complete_wq );
+ if (c->io_complete_wq)
+ destroy_workqueue(c->io_complete_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->btree_io_complete_wq)
@@ -711,7 +711,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err;
pr_uuid(&name, c->sb.user_uuid.b);
- strlcpy(c->name, name.buf, sizeof(c->name));
+ strscpy(c->name, name.buf, sizeof(c->name));
printbuf_exit(&name);
ret = name.allocation_failure ? -ENOMEM : 0;
@@ -1784,9 +1784,8 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
}
ret = bch2_trans_mark_dev_sb(c, ca);
- if (ret) {
+ if (ret)
goto err;
- }
mutex_lock(&c->sb_lock);
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 103fde97..0f45aef7 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -175,7 +175,7 @@ read_attribute(minor);
read_attribute(bucket_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
-read_attribute(durability);
+rw_attribute(durability);
read_attribute(iodone);
read_attribute(io_latency_read);
@@ -425,7 +425,7 @@ SHOW(bch2_fs)
bch2_btree_updates_to_text(out, c);
if (attr == &sysfs_btree_cache)
- bch2_btree_cache_to_text(out, c);
+ bch2_btree_cache_to_text(out, &c->btree_cache);
if (attr == &sysfs_btree_key_cache)
bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
@@ -907,6 +907,19 @@ STORE(bch2_dev)
mutex_unlock(&c->sb_lock);
}
+ if (attr == &sysfs_durability) {
+ u64 v = strtoul_or_return(buf);
+
+ mutex_lock(&c->sb_lock);
+ mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+
+ if (v != BCH_MEMBER_DURABILITY(mi)) {
+ SET_BCH_MEMBER_DURABILITY(mi, v + 1);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
if (attr == &sysfs_label) {
char *tmp;
int ret;
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index f08215af..62fa6620 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -433,7 +433,7 @@ static void pr_time_units(struct printbuf *out, u64 ns)
static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
{
- prt_printf(out, name);
+ prt_str(out, name);
prt_tab(out);
pr_time_units(out, ns);
prt_newline(out);
@@ -786,8 +786,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
}
}
-#include "eytzinger.h"
-
static int alignment_ok(const void *base, size_t align)
{
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
diff --git a/linux/string.c b/linux/string.c
index fd2797ea..a32a8995 100644
--- a/linux/string.c
+++ b/linux/string.c
@@ -21,8 +21,10 @@
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <string.h>
+#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/string.h>
@@ -62,6 +64,31 @@ size_t strlcpy(char *dest, const char *src, size_t size)
return ret;
}
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+ long res = 0;
+
+ if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
+ return -E2BIG;
+
+ while (count) {
+ char c;
+
+ c = src[res];
+ dest[res] = c;
+ if (!c)
+ return res;
+ res++;
+ count--;
+ }
+
+ /* Hit buffer length without finding a NUL; force NUL-termination. */
+ if (res)
+ dest[res-1] = '\0';
+
+ return -E2BIG;
+}
+
void memzero_explicit(void *s, size_t count)
{
memset(s, 0, count);
diff --git a/linux/string_helpers.c b/linux/string_helpers.c
index 3d720bc0..29c498ad 100644
--- a/linux/string_helpers.c
+++ b/linux/string_helpers.c
@@ -52,7 +52,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
static const unsigned int rounding[] = { 500, 50, 5 };
int i = 0, j;
u32 remainder = 0, sf_cap;
- char tmp[8];
+ char tmp[12];
const char *unit;
tmp[0] = '\0';