Merge with 91fedfccb2 bcachefs: Fix setquota

author: Kent Overstreet <kent.overstreet@gmail.com> 2020-05-13 17:59:29 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2020-05-13 17:59:29 -0400
commit: 323707f353a328a569aa55956288aa91131eb727 (patch)
tree: 38ff10a0690ba5223b5860ea44bfa01d6d130428
parent: e75846f83e05114fa4a1dbcb2ca4cd7f6b06030b (diff)
18 files changed, 279 insertions, 431 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index b2d1b8f9c9b8..c37945189c68 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -315,7 +315,9 @@ retry:
 	bch2_trans_update(trans, iter, &a->k_i,
 			  BTREE_TRIGGER_NORUN);
 	ret = bch2_trans_commit(trans, NULL, NULL,
-				BTREE_INSERT_NOFAIL|flags);
+				BTREE_INSERT_NOFAIL|
+				BTREE_INSERT_USE_RESERVE|
+				flags);
 err:
 	if (ret == -EINTR)
 		goto retry;
@@ -1033,7 +1035,16 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		spin_lock(&c->freelist_lock);
-		for (i = 0; i < RESERVE_NR; i++)
+		for (i = 0; i < RESERVE_NR; i++) {
+
+			/*
+			 * Don't strand buckets on the copygc freelist until
+			 * after recovery is finished:
+			 */
+			if (!test_bit(BCH_FS_STARTED, &c->flags) &&
+			    i == RESERVE_MOVINGGC)
+				continue;
+
 			if (fifo_push(&ca->free[i], bucket)) {
 				fifo_pop(&ca->free_inc, bucket);
 
@@ -1043,6 +1054,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
 				spin_unlock(&c->freelist_lock);
 				goto out;
 			}
+		}
 
 		if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
 			ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 289d7ae4e98c..fa9593764f0c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -732,7 +732,7 @@ struct bch_fs {
 	ZSTD_parameters		zstd_params;
 
 	struct crypto_shash	*sha256;
-	struct crypto_skcipher	*chacha20;
+	struct crypto_sync_skcipher *chacha20;
 	struct crypto_shash	*poly1305;
 
 	atomic64_t		key_version;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 61d4c98d3476..63063748d4f5 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -620,7 +620,7 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
 		bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
 			     bytes);
 
-		nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE));
+		nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
 	}
 
 	bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
@@ -736,6 +736,17 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
 		struct btree_node *bn =
 			container_of(i, struct btree_node, keys);
 		/* These indicate that we read the wrong btree node: */
+
+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+			struct bch_btree_ptr_v2 *bp =
+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+			/* XXX endianness */
+			btree_err_on(bp->seq != bn->keys.seq,
+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
+				     "incorrect sequence number (wrong btree node)");
+		}
+
 		btree_err_on(BTREE_NODE_ID(bn) != b->btree_id,
 			     BTREE_ERR_MUST_RETRY, c, b, i,
 			     "incorrect btree id");
@@ -1626,6 +1637,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
 	 * reflect that those writes were done and the data flushed from the
 	 * journal:
 	 *
+	 * Also on journal error, the pending write may have updates that were
+	 * never journalled (interior nodes, see btree_update_nodes_written()) -
+	 * it's critical that we don't do the write in that case otherwise we
+	 * will have updates visible that weren't in the journal:
+	 *
 	 * Make sure to update b->written so bch2_btree_init_next() doesn't
 	 * break:
 	 */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 98782d55b846..75b70187a954 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -586,12 +586,12 @@ static void __bch2_btree_update_free(struct btree_update *as)
 	bch2_journal_pin_drop(&c->journal, &as->journal);
 	bch2_journal_pin_flush(&c->journal, &as->journal);
 
-	BUG_ON((as->nr_new_nodes || as->nr_pending) &&
-	       !bch2_journal_error(&c->journal));;
+	BUG_ON(as->nr_new_nodes || as->nr_pending);
 
 	if (as->reserve)
 		bch2_btree_reserve_put(c, as->reserve);
 
+	list_del(&as->unwritten_list);
 	list_del(&as->list);
 
 	closure_debug_destroy(&as->cl);
@@ -609,15 +609,28 @@ static void bch2_btree_update_free(struct btree_update *as)
 	mutex_unlock(&c->btree_interior_update_lock);
 }
 
+static inline bool six_trylock_intentwrite(struct six_lock *lock)
+{
+	if (!six_trylock_intent(lock))
+		return false;
+
+	if (!six_trylock_write(lock)) {
+		six_unlock_intent(lock);
+		return false;
+	}
+
+	return true;
+}
+
 static void btree_update_nodes_written(struct closure *cl)
 {
 	struct btree_update *as = container_of(cl, struct btree_update, cl);
-	struct btree *new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES];
-	unsigned nr_new_nodes;
+	struct btree *nodes_need_write[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES + 1];
+	unsigned nr_nodes_need_write;
 	struct journal_res res = { 0 };
 	struct bch_fs *c = as->c;
+	struct btree_root *r;
 	struct btree *b;
-	struct bset *i;
 	int ret;
 
 	/*
@@ -628,7 +641,7 @@ static void btree_update_nodes_written(struct closure *cl)
 	mutex_lock(&c->btree_interior_update_lock);
 	as->nodes_written = true;
 again:
-	nr_new_nodes = 0;
+	nr_nodes_need_write = 0;
 	as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
 				      struct btree_update, unwritten_list);
 	if (!as || !as->nodes_written) {
@@ -637,31 +650,57 @@ again:
 	}
 
 	b = as->b;
-	if (b && !six_trylock_intent(&b->lock)) {
+	if (b && !six_trylock_intentwrite(&b->lock)) {
 		mutex_unlock(&c->btree_interior_update_lock);
+
 		btree_node_lock_type(c, b, SIX_LOCK_intent);
+		six_lock_write(&b->lock);
+
+		six_unlock_write(&b->lock);
 		six_unlock_intent(&b->lock);
+
 		mutex_lock(&c->btree_interior_update_lock);
 		goto again;
 	}
 
-	list_del(&as->unwritten_list);
-
 	ret = bch2_journal_res_get(&c->journal, &res, as->journal_u64s,
+				   JOURNAL_RES_GET_NONBLOCK|
 				   JOURNAL_RES_GET_RESERVED);
-	if (ret) {
-		BUG_ON(!bch2_journal_error(&c->journal));
-		/* can't unblock btree writes */
-		goto free_update;
+	if (ret == -EAGAIN) {
+		unsigned u64s = as->journal_u64s;
+
+		if (b) {
+			six_unlock_write(&b->lock);
+			six_unlock_intent(&b->lock);
+		}
+
+		mutex_unlock(&c->btree_interior_update_lock);
+
+		ret = bch2_journal_res_get(&c->journal, &res, u64s,
+					   JOURNAL_RES_GET_CHECK|
+					   JOURNAL_RES_GET_RESERVED);
+		if (!ret) {
+			mutex_lock(&c->btree_interior_update_lock);
+			goto again;
+		}
 	}
 
-	{
+	if (!ret) {
 		struct journal_buf *buf = &c->journal.buf[res.idx];
 		struct jset_entry *entry = vstruct_idx(buf->data, res.offset);
 
 		res.offset	+= as->journal_u64s;
 		res.u64s	-= as->journal_u64s;
 		memcpy_u64s(entry, as->journal_entries, as->journal_u64s);
+	} else {
+		/*
+		 * On journal error we have to run most of the normal path so
+		 * that shutdown works - unblocking btree node writes in
+		 * particular and writing them if needed - except for
+		 * journalling the update:
+		 */
+
+		BUG_ON(!bch2_journal_error(&c->journal));
 	}
 
 	switch (as->mode) {
@@ -669,26 +708,41 @@ again:
 		BUG();
 	case BTREE_INTERIOR_UPDATING_NODE:
 		/* @b is the node we did the final insert into: */
-		BUG_ON(!res.ref);
 
-		six_lock_write(&b->lock);
+		/*
+		 * On failure to get a journal reservation, we still have to
+		 * unblock the write and allow most of the write path to happen
+		 * so that shutdown works, but the i->journal_seq mechanism
+		 * won't work to prevent the btree write from being visible (we
+		 * didn't get a journal sequence number) - instead
+		 * __bch2_btree_node_write() doesn't do the actual write if
+		 * we're in journal error state:
+		 */
+
 		list_del(&as->write_blocked_list);
 
-		i = btree_bset_last(b);
-		i->journal_seq = cpu_to_le64(
-			max(res.seq,
-			    le64_to_cpu(i->journal_seq)));
+		if (!ret) {
+			struct bset *i = btree_bset_last(b);
+
+			i->journal_seq = cpu_to_le64(
+				max(res.seq,
+				    le64_to_cpu(i->journal_seq)));
+
+			bch2_btree_add_journal_pin(c, b, res.seq);
+		}
+
+		nodes_need_write[nr_nodes_need_write++] = b;
 
-		bch2_btree_add_journal_pin(c, b, res.seq);
 		six_unlock_write(&b->lock);
+		six_unlock_intent(&b->lock);
 		break;
 
 	case BTREE_INTERIOR_UPDATING_AS:
 		BUG_ON(b);
 		break;
 
-	case BTREE_INTERIOR_UPDATING_ROOT: {
-		struct btree_root *r = &c->btree_roots[as->btree_id];
+	case BTREE_INTERIOR_UPDATING_ROOT:
+		r = &c->btree_roots[as->btree_id];
 
 		BUG_ON(b);
 
@@ -700,41 +754,25 @@ again:
 		mutex_unlock(&c->btree_root_lock);
 		break;
 	}
-	}
 
 	bch2_journal_pin_drop(&c->journal, &as->journal);
 
 	bch2_journal_res_put(&c->journal, &res);
 	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-free_update:
-	/* Do btree write after dropping journal res: */
-	if (b) {
-		/*
-		 * b->write_blocked prevented it from being written, so
-		 * write it now if it needs to be written:
-		 */
-		btree_node_write_if_need(c, b, SIX_LOCK_intent);
-		six_unlock_intent(&b->lock);
-	}
-
-	if (!ret) {
-		nr_new_nodes = as->nr_new_nodes;
-		memcpy(new_nodes,
-		       as->new_nodes,
-		       as->nr_new_nodes * sizeof(struct btree *));
 
-		while (as->nr_new_nodes) {
-			struct btree *b = as->new_nodes[--as->nr_new_nodes];
+	while (as->nr_new_nodes) {
+		b = as->new_nodes[--as->nr_new_nodes];
 
-			BUG_ON(b->will_make_reachable != (unsigned long) as);
-			b->will_make_reachable = 0;
-		}
+		BUG_ON(b->will_make_reachable != (unsigned long) as);
+		b->will_make_reachable = 0;
 
-		while (as->nr_pending)
-			bch2_btree_node_free_ondisk(c,
-				&as->pending[--as->nr_pending], res.seq);
+		nodes_need_write[nr_nodes_need_write++] = b;
 	}
 
+	while (as->nr_pending)
+		bch2_btree_node_free_ondisk(c,
+			&as->pending[--as->nr_pending], res.seq);
+
 	__bch2_btree_update_free(as);
 	/*
 	 * for flush_held_btree_writes() waiting on updates to flush or
@@ -747,8 +785,10 @@ free_update:
 	 * */
 	mutex_unlock(&c->btree_interior_update_lock);
 
-	while (nr_new_nodes) {
-		struct btree *b = new_nodes[--nr_new_nodes];
+	/* Do btree writes after dropping journal res/locks: */
+	while (nr_nodes_need_write) {
+		b = nodes_need_write[--nr_nodes_need_write];
+
 		btree_node_lock_type(c, b, SIX_LOCK_read);
 		bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
 		six_unlock_read(&b->lock);
@@ -972,7 +1012,7 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
 				      JOURNAL_RES_GET_NONBLOCK);
 	if (ret == -EAGAIN) {
 		if (flags & BTREE_INSERT_NOUNLOCK)
-			return -EINTR;
+			return ERR_PTR(-EINTR);
 
 		bch2_trans_unlock(trans);
 
@@ -1001,6 +1041,7 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
 	as->btree_id	= id;
 	as->reserve	= reserve;
 	INIT_LIST_HEAD(&as->write_blocked_list);
+	INIT_LIST_HEAD(&as->unwritten_list);
 	as->journal_preres = journal_preres;
 
 	bch2_keylist_init(&as->parent_keys, as->inline_keys);
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 9ef2cb9b9904..3d88719ba86c 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -10,9 +10,10 @@
 #include <linux/random.h>
 #include <linux/scatterlist.h>
 #include <crypto/algapi.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <crypto/hash.h>
 #include <crypto/poly1305.h>
+#include <crypto/skcipher.h>
 #include <keys/user-type.h>
 
 static u64 bch2_checksum_init(unsigned type)
@@ -67,21 +68,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t
 	}
 }
 
-static inline void do_encrypt_sg(struct crypto_skcipher *tfm,
+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
 				 struct nonce nonce,
 				 struct scatterlist *sg, size_t len)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 	int ret;
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
 
 	ret = crypto_skcipher_encrypt(req);
 	BUG_ON(ret);
 }
 
-static inline void do_encrypt(struct crypto_skcipher *tfm,
+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
 			      struct nonce nonce,
 			      void *buf, size_t len)
 {
@@ -94,8 +95,8 @@ static inline void do_encrypt(struct crypto_skcipher *tfm,
 int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
 			    void *buf, size_t len)
 {
-	struct crypto_skcipher *chacha20 =
-		crypto_alloc_skcipher("chacha20", 0, 0);
+	struct crypto_sync_skcipher *chacha20 =
+		crypto_alloc_sync_skcipher("chacha20", 0, 0);
 	int ret;
 
 	if (!chacha20) {
@@ -103,7 +104,8 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
 		return PTR_ERR(chacha20);
 	}
 
-	ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key));
+	ret = crypto_skcipher_setkey(&chacha20->base,
+				     (void *) key, sizeof(*key));
 	if (ret) {
 		pr_err("crypto_skcipher_setkey() error: %i", ret);
 		goto err;
@@ -111,7 +113,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
 
 	do_encrypt(chacha20, nonce, buf, len);
 err:
-	crypto_free_skcipher(chacha20);
+	crypto_free_sync_skcipher(chacha20);
 	return ret;
 }
 
@@ -198,7 +200,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
 			kunmap_atomic(p);
 		}
 #else
-		__bio_for_each_contig_segment(bv, bio, *iter, *iter)
+		__bio_for_each_bvec(bv, bio, *iter, *iter)
 			crc = bch2_checksum_update(type, crc,
 				page_address(bv.bv_page) + bv.bv_offset,
 				bv.bv_len);
@@ -223,7 +225,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
 			kunmap_atomic(p);
 		}
 #else
-		__bio_for_each_contig_segment(bv, bio, *iter, *iter)
+		__bio_for_each_bvec(bv, bio, *iter, *iter)
 			crypto_shash_update(desc,
 				page_address(bv.bv_page) + bv.bv_offset,
 				bv.bv_len);
@@ -462,7 +464,7 @@ err:
 static int bch2_alloc_ciphers(struct bch_fs *c)
 {
 	if (!c->chacha20)
-		c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0);
+		c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
 	if (IS_ERR(c->chacha20)) {
 		bch_err(c, "error requesting chacha20 module: %li",
 			PTR_ERR(c->chacha20));
@@ -545,7 +547,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
 			goto err;
 	}
 
-	ret = crypto_skcipher_setkey(c->chacha20,
+	ret = crypto_skcipher_setkey(&c->chacha20->base,
 			(void *) &key.key, sizeof(key.key));
 	if (ret)
 		goto err;
@@ -573,7 +575,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c)
 	if (!IS_ERR_OR_NULL(c->poly1305))
 		crypto_free_shash(c->poly1305);
 	if (!IS_ERR_OR_NULL(c->chacha20))
-		crypto_free_skcipher(c->chacha20);
+		crypto_free_sync_skcipher(c->chacha20);
 	if (!IS_ERR_OR_NULL(c->sha256))
 		crypto_free_shash(c->sha256);
 }
@@ -605,7 +607,7 @@ int bch2_fs_encryption_init(struct bch_fs *c)
 	if (ret)
 		goto out;
 
-	ret = crypto_skcipher_setkey(c->chacha20,
+	ret = crypto_skcipher_setkey(&c->chacha20->base,
 			(void *) &key.key, sizeof(key.key));
 	if (ret)
 		goto out;
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 833537cc8fd0..24dee8039d57 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -7,7 +7,7 @@
 #include "super-io.h"
 
 #include <linux/crc64.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 
 static inline bool bch2_checksum_mergeable(unsigned type)
 {
@@ -138,9 +138,9 @@ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
 /* for skipping ahead and encrypting/decrypting at an offset: */
 static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
 {
-	EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1));
+	EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
 
-	le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE);
+	le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
 	return nonce;
 }
 
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 4a163b6d59da..c56938f21baa 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -39,6 +39,24 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
 	BUG();
 }
 
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	void *expected_start = NULL;
+
+	__bio_for_each_bvec(bv, bio, iter, start) {
+		if (expected_start &&
+		    expected_start != page_address(bv.bv_page) + bv.bv_offset)
+			return false;
+
+		expected_start = page_address(bv.bv_page) +
+			bv.bv_offset + bv.bv_len;
+	}
+
+	return true;
+}
+
 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
 				       struct bvec_iter start, int rw)
 {
@@ -48,27 +66,28 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
 	unsigned nr_pages = 0;
 	struct page *stack_pages[16];
 	struct page **pages = NULL;
-	bool first = true;
-	unsigned prev_end = PAGE_SIZE;
 	void *data;
 
 	BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
 
-#ifndef CONFIG_HIGHMEM
-	__bio_for_each_contig_segment(bv, bio, iter, start) {
-		if (bv.bv_len == start.bi_size)
-			return (struct bbuf) {
-				.b = page_address(bv.bv_page) + bv.bv_offset,
-				.type = BB_NONE, .rw = rw
-			};
-	}
-#endif
+	if (!IS_ENABLED(CONFIG_HIGHMEM) &&
+	    bio_phys_contig(bio, start))
+		return (struct bbuf) {
+			.b = page_address(bio_iter_page(bio, start)) +
+				bio_iter_offset(bio, start),
+			.type = BB_NONE, .rw = rw
+		};
+
+	/* check if we can map the pages contiguously: */
 	__bio_for_each_segment(bv, bio, iter, start) {
-		if ((!first && bv.bv_offset) ||
-		    prev_end != PAGE_SIZE)
+		if (iter.bi_size != start.bi_size &&
+		    bv.bv_offset)
+			goto bounce;
+
+		if (bv.bv_len < iter.bi_size &&
+		    bv.bv_offset + bv.bv_len < PAGE_SIZE)
 			goto bounce;
 
-		prev_end = bv.bv_offset + bv.bv_len;
 		nr_pages++;
 	}
 
@@ -172,20 +191,21 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
 	}
 	case BCH_COMPRESSION_TYPE_zstd: {
 		ZSTD_DCtx *ctx;
-		size_t len;
+		size_t real_src_len = le32_to_cpup(src_data.b);
+
+		if (real_src_len > src_len - 4)
+			goto err;
 
 		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
 		ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
 
-		src_len = le32_to_cpup(src_data.b);
-
-		len = ZSTD_decompressDCtx(ctx,
+		ret = ZSTD_decompressDCtx(ctx,
 				dst_data,	dst_len,
-				src_data.b + 4, src_len);
+				src_data.b + 4, real_src_len);
 
 		mempool_free(workspace, &c->decompress_workspace);
 
-		if (len != dst_len)
+		if (ret != dst_len)
 			goto err;
 		break;
 	}
@@ -264,7 +284,8 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
 	if (ret)
 		goto err;
 
-	if (dst_data.type != BB_NONE)
+	if (dst_data.type != BB_NONE &&
+	    dst_data.type != BB_VMAP)
 		memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
 err:
 	bio_unmap_or_unbounce(c, dst_data);
@@ -407,7 +428,8 @@ static unsigned __bio_compress(struct bch_fs *c,
 	memset(dst_data.b + *dst_len, 0, pad);
 	*dst_len += pad;
 
-	if (dst_data.type != BB_NONE)
+	if (dst_data.type != BB_NONE &&
+	    dst_data.type != BB_VMAP)
 		memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
 
 	BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
@@ -512,7 +534,6 @@ void bch2_fs_compress_exit(struct bch_fs *c)
 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 {
 	size_t max_extent = c->sb.encoded_extent_max << 9;
-	size_t order = get_order(max_extent);
 	size_t decompress_workspace_size = 0;
 	bool decompress_workspace_needed;
 	ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
@@ -547,14 +568,14 @@ have_compressed:
 
 	if (!mempool_initialized(&c->compression_bounce[READ])) {
 		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-						  1, order);
+						  1, max_extent);
 		if (ret)
 			goto out;
 	}
 
 	if (!mempool_initialized(&c->compression_bounce[WRITE])) {
 		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-						  1, order);
+						  1, max_extent);
 		if (ret)
 			goto out;
 	}
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index ab2d808eea43..7de61f7f4314 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -602,7 +602,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
 	EBUG_ON(!PageLocked(page));
 	EBUG_ON(!PageLocked(newpage));
 
-	ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	ret = migrate_page_move_mapping(mapping, newpage, page, 0);
 	if (ret != MIGRATEPAGE_SUCCESS)
 		return ret;
 
@@ -627,10 +627,10 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
 
 static void bch2_readpages_end_io(struct bio *bio)
 {
+	struct bvec_iter_all iter;
 	struct bio_vec *bv;
-	unsigned i;
 
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, iter) {
 		struct page *page = bv->bv_page;
 
 		if (!bio->bi_status) {
@@ -782,11 +782,8 @@ static void readpage_bio_extend(struct readpages_iter *iter,
 			if (!get_more)
 				break;
 
-			rcu_read_lock();
-			page = radix_tree_lookup(&iter->mapping->i_pages, page_offset);
-			rcu_read_unlock();
-
-			if (page && !radix_tree_exceptional_entry(page))
+			page = xa_load(&iter->mapping->i_pages, page_offset);
+			if (page && !xa_is_value(page))
 				break;
 
 			page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
@@ -1037,32 +1034,33 @@ static void bch2_writepage_io_done(struct closure *cl)
 					struct bch_writepage_io, cl);
 	struct bch_fs *c = io->op.c;
 	struct bio *bio = &io->op.wbio.bio;
+	struct bvec_iter_all iter;
 	struct bio_vec *bvec;
-	unsigned i, j;
+	unsigned i;
 
 	if (io->op.error) {
-		bio_for_each_segment_all(bvec, bio, i) {
+		bio_for_each_segment_all(bvec, bio, iter) {
 			struct bch_page_state *s;
 
 			SetPageError(bvec->bv_page);
-			mapping_set_error(io->inode->v.i_mapping, -EIO);
+			mapping_set_error(bvec->bv_page->mapping, -EIO);
 
 			s = __bch2_page_state(bvec->bv_page);
 			spin_lock(&s->lock);
-			for (j = 0; j < PAGE_SECTORS; j++)
-				s->s[j].nr_replicas = 0;
+			for (i = 0; i < PAGE_SECTORS; i++)
+				s->s[i].nr_replicas = 0;
 			spin_unlock(&s->lock);
 		}
 	}
 
 	if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
-		bio_for_each_segment_all(bvec, bio, i) {
+		bio_for_each_segment_all(bvec, bio, iter) {
 			struct bch_page_state *s;
 
 			s = __bch2_page_state(bvec->bv_page);
 			spin_lock(&s->lock);
-			for (j = 0; j < PAGE_SECTORS; j++)
-				s->s[j].nr_replicas = 0;
+			for (i = 0; i < PAGE_SECTORS; i++)
+				s->s[i].nr_replicas = 0;
 			spin_unlock(&s->lock);
 		}
 	}
@@ -1086,7 +1084,7 @@ static void bch2_writepage_io_done(struct closure *cl)
 	 */
 	i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, iter) {
 		struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
 
 		if (atomic_dec_and_test(&s->write_count))
@@ -1240,7 +1238,7 @@ do_io:
 
 		if (w->io &&
 		    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-		     bio_full(&w->io->op.wbio.bio) ||
+		     bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
 		     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
 		     (BIO_MAX_PAGES * PAGE_SIZE) ||
 		     bio_end_sector(&w->io->op.wbio.bio) != sector))
@@ -1806,8 +1804,9 @@ static long bch2_dio_write_loop(struct dio_write *dio)
 	struct address_space *mapping = req->ki_filp->f_mapping;
 	struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
 	struct bio *bio = &dio->op.wbio.bio;
+	struct bvec_iter_all iter;
 	struct bio_vec *bv;
-	unsigned i, unaligned;
+	unsigned unaligned;
 	u64 new_i_size;
 	bool sync = dio->sync;
 	long ret;
@@ -1849,7 +1848,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
 			 * bio_iov_iter_get_pages was only able to get <
 			 * blocksize worth of pages:
 			 */
-			bio_for_each_segment_all(bv, bio, i)
+			bio_for_each_segment_all(bv, bio, iter)
 				put_page(bv->bv_page);
 			ret = -EFAULT;
 			goto err;
@@ -1897,7 +1896,7 @@ loop:
 			i_size_write(&inode->v, new_i_size);
 		spin_unlock(&inode->v.i_lock);
 
-		bio_for_each_segment_all(bv, bio, i)
+		bio_for_each_segment_all(bv, bio, iter)
 			put_page(bv->bv_page);
 		if (!dio->iter.count || dio->op.error)
 			break;
@@ -2834,235 +2833,6 @@ static void mark_range_unallocated(struct bch_inode_info *inode,
 	} while (index <= end_index);
 }
 
-static int generic_access_check_limits(struct file *file, loff_t pos,
-				       loff_t *count)
-{
-	struct inode *inode = file->f_mapping->host;
-	loff_t max_size = inode->i_sb->s_maxbytes;
-
-	if (!(file->f_flags & O_LARGEFILE))
-		max_size = MAX_NON_LFS;
-
-	if (unlikely(pos >= max_size))
-		return -EFBIG;
-	*count = min(*count, max_size - pos);
-	return 0;
-}
-
-static int generic_write_check_limits(struct file *file, loff_t pos,
-				      loff_t *count)
-{
-	loff_t limit = rlimit(RLIMIT_FSIZE);
-
-	if (limit != RLIM_INFINITY) {
-		if (pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			return -EFBIG;
-		}
-		*count = min(*count, limit - pos);
-	}
-
-	return generic_access_check_limits(file, pos, count);
-}
-
-static int generic_remap_checks(struct file *file_in, loff_t pos_in,
-			 struct file *file_out, loff_t pos_out,
-			 loff_t *req_count, unsigned int remap_flags)
-{
-	struct inode *inode_in = file_in->f_mapping->host;
-	struct inode *inode_out = file_out->f_mapping->host;
-	uint64_t count = *req_count;
-	uint64_t bcount;
-	loff_t size_in, size_out;
-	loff_t bs = inode_out->i_sb->s_blocksize;
-	int ret;
-
-	/* The start of both ranges must be aligned to an fs block. */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
-		return -EINVAL;
-
-	/* Ensure offsets don't wrap. */
-	if (pos_in + count < pos_in || pos_out + count < pos_out)
-		return -EINVAL;
-
-	size_in = i_size_read(inode_in);
-	size_out = i_size_read(inode_out);
-
-	/* Dedupe requires both ranges to be within EOF. */
-	if ((remap_flags & REMAP_FILE_DEDUP) &&
-	    (pos_in >= size_in || pos_in + count > size_in ||
-	     pos_out >= size_out || pos_out + count > size_out))
-		return -EINVAL;
-
-	/* Ensure the infile range is within the infile. */
-	if (pos_in >= size_in)
-		return -EINVAL;
-	count = min(count, size_in - (uint64_t)pos_in);
-
-	ret = generic_access_check_limits(file_in, pos_in, &count);
-	if (ret)
-		return ret;
-
-	ret = generic_write_check_limits(file_out, pos_out, &count);
-	if (ret)
-		return ret;
-
-	/*
-	 * If the user wanted us to link to the infile's EOF, round up to the
-	 * next block boundary for this check.
-	 *
-	 * Otherwise, make sure the count is also block-aligned, having
-	 * already confirmed the starting offsets' block alignment.
-	 */
-	if (pos_in + count == size_in) {
-		bcount = ALIGN(size_in, bs) - pos_in;
-	} else {
-		if (!IS_ALIGNED(count, bs))
-			count = ALIGN_DOWN(count, bs);
-		bcount = count;
-	}
-
-	/* Don't allow overlapped cloning within the same file. */
-	if (inode_in == inode_out &&
-	    pos_out + bcount > pos_in &&
-	    pos_out < pos_in + bcount)
-		return -EINVAL;
-
-	/*
-	 * We shortened the request but the caller can't deal with that, so
-	 * bounce the request back to userspace.
-	 */
-	if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
-		return -EINVAL;
-
-	*req_count = count;
-	return 0;
-}
-
-static int generic_remap_check_len(struct inode *inode_in,
-				   struct inode *inode_out,
-				   loff_t pos_out,
-				   loff_t *len,
-				   unsigned int remap_flags)
-{
-	u64 blkmask = i_blocksize(inode_in) - 1;
-	loff_t new_len = *len;
-
-	if ((*len & blkmask) == 0)
-		return 0;
-
-	if ((remap_flags & REMAP_FILE_DEDUP) ||
-	    pos_out + *len < i_size_read(inode_out))
-		new_len &= ~blkmask;
-
-	if (new_len == *len)
-		return 0;
-
-	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
-		*len = new_len;
-		return 0;
-	}
-
-	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
-}
-
-static int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
-				  struct file *file_out, loff_t pos_out,
-				  loff_t *len, unsigned int remap_flags)
-{
-	struct inode *inode_in = file_inode(file_in);
-	struct inode *inode_out = file_inode(file_out);
-	bool same_inode = (inode_in == inode_out);
-	int ret;
-
-	/* Don't touch certain kinds of inodes */
-	if (IS_IMMUTABLE(inode_out))
-		return -EPERM;
-
-	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
-		return -ETXTBSY;
-
-	/* Don't reflink dirs, pipes, sockets... */
-	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-		return -EISDIR;
-	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-		return -EINVAL;
-
-	/* Zero length dedupe exits immediately; reflink goes to EOF. */
-	if (*len == 0) {
-		loff_t isize = i_size_read(inode_in);
-
-		if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
-			return 0;
-		if (pos_in > isize)
-			return -EINVAL;
-		*len = isize - pos_in;
-		if (*len == 0)
-			return 0;
-	}
-
-	/* Check that we don't violate system file offset limits. */
-	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-			remap_flags);
-	if (ret)
-		return ret;
-
-	/* Wait for the completion of any pending IOs on both files */
-	inode_dio_wait(inode_in);
-	if (!same_inode)
-		inode_dio_wait(inode_out);
-
-	ret = filemap_write_and_wait_range(inode_in->i_mapping,
-			pos_in, pos_in + *len - 1);
-	if (ret)
-		return ret;
-
-	ret = filemap_write_and_wait_range(inode_out->i_mapping,
-			pos_out, pos_out + *len - 1);
-	if (ret)
-		return ret;
-
-	/*
-	 * Check that the extents are the same.
-	 */
-	if (remap_flags & REMAP_FILE_DEDUP) {
-		bool		is_same = false;
-
-		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
-				inode_out, pos_out, *len, &is_same);
-		if (ret)
-			return ret;
-		if (!is_same)
-			return -EBADE;
-	}
-
-	ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
-			remap_flags);
-	if (ret)
-		return ret;
-
-	/* If can't alter the file contents, we're done. */
-	if (!(remap_flags & REMAP_FILE_DEDUP)) {
-		/* Update the timestamps, since we can alter file contents. */
-		if (!(file_out->f_mode & FMODE_NOCMTIME)) {
-			ret = file_update_time(file_out);
-			if (ret)
-				return ret;
-		}
-
-		/*
-		 * Clear the security bits if the process is not being run by
-		 * root.  This keeps people from modifying setuid and setgid
-		 * binaries.
-		 */
-		ret = file_remove_privs(file_out);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
 			     struct file *file_dst, loff_t pos_dst,
 			     loff_t len, unsigned remap_flags)
@@ -3255,7 +3025,7 @@ static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
 	loff_t ret = -1;
 
 	page = find_lock_entry(mapping, index);
-	if (!page || radix_tree_exception(page))
+	if (!page || xa_is_value(page))
 		return offset;
 
 	pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
index 1b593ea707d5..7063556d289b 100644
--- a/fs/bcachefs/fs-io.h
+++ b/fs/bcachefs/fs-io.h
@@ -35,10 +35,6 @@ int bch2_fsync(struct file *, loff_t, loff_t, int);
 int bch2_truncate(struct bch_inode_info *, struct iattr *);
 long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
 
-#define REMAP_FILE_ADVISORY		(0)
-#define REMAP_FILE_DEDUP		(1 << 0)
-#define REMAP_FILE_CAN_SHORTEN		(1 << 1)
-
 loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
 			     loff_t, loff_t, unsigned);
 
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index b1445bdc3e9d..1c89a1b2c2d0 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -966,15 +966,6 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
 	return bch2_readdir(c, inode->v.i_ino, ctx);
 }
 
-static int bch2_clone_file_range(struct file *file_src, loff_t pos_src,
-				 struct file *file_dst, loff_t pos_dst,
-				 u64 len)
-{
-	return bch2_remap_file_range(file_src, pos_src,
-				     file_dst, pos_dst,
-				     len, 0);
-}
-
 static const struct file_operations bch_file_operations = {
 	.llseek		= bch2_llseek,
 	.read_iter	= bch2_read_iter,
@@ -992,7 +983,7 @@ static const struct file_operations bch_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= bch2_compat_fs_ioctl,
 #endif
-	.clone_file_range = bch2_clone_file_range,
+	.remap_file_range = bch2_remap_file_range,
 };
 
 static const struct inode_operations bch_file_inode_operations = {
@@ -1523,7 +1514,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
 
 	sb->s_bdi->congested_fn		= bch2_congested;
 	sb->s_bdi->congested_data	= c;
-	sb->s_bdi->ra_pages		= VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
 
 	for_each_online_member(ca, c, i) {
 		struct block_device *bdev = ca->disk_sb.bdev;
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index ff3dd31c134c..5801a036391e 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -124,10 +124,10 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
 
 void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
 {
+	struct bvec_iter_all iter;
 	struct bio_vec *bv;
-	unsigned i;
 
-	bio_for_each_segment_all(bv, bio, i)
+	bio_for_each_segment_all(bv, bio, iter)
 		if (bv->bv_page != ZERO_PAGE(0))
 			mempool_free(bv->bv_page, &c->bio_bounce_pages);
 	bio->bi_vcnt = 0;
@@ -588,7 +588,9 @@ static void bch2_write_index(struct closure *cl)
 
 	__bch2_write_index(op);
 
-	if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
+	if (!(op->flags & BCH_WRITE_DONE)) {
+		continue_at(cl, __bch2_write, index_update_wq(op));
+	} else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
 		bch2_journal_flush_seq_async(&c->journal,
 					     *op_journal_seq(op),
 					     cl);
@@ -1103,8 +1105,15 @@ again:
 		if (ret < 0)
 			goto err;
 
-		if (ret)
+		if (ret) {
 			skip_put = false;
+		} else {
+			/*
+			 * for the skip_put optimization this has to be set
+			 * before we submit the bio:
+			 */
+			op->flags |= BCH_WRITE_DONE;
+		}
 
 		bio->bi_end_io	= bch2_write_endio;
 		bio->bi_private	= &op->cl;
@@ -1127,16 +1136,30 @@ again:
 	return;
 err:
 	op->error = ret;
+	op->flags |= BCH_WRITE_DONE;
 
 	continue_at(cl, bch2_write_index, index_update_wq(op));
 	return;
 flush_io:
+	/*
+	 * If the write can't all be submitted at once, we generally want to
+	 * block synchronously as that signals backpressure to the caller.
+	 *
+	 * However, if we're running out of a workqueue, we can't block here
+	 * because we'll be blocking other work items from completing:
+	 */
+	if (current->flags & PF_WQ_WORKER) {
+		continue_at(cl, bch2_write_index, index_update_wq(op));
+		return;
+	}
+
 	closure_sync(cl);
 
 	if (!bch2_keylist_empty(&op->insert_keys)) {
 		__bch2_write_index(op);
 
 		if (op->error) {
+			op->flags |= BCH_WRITE_DONE;
 			continue_at_nobarrier(cl, bch2_write_done, NULL);
 			return;
 		}
@@ -1182,6 +1205,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
 	bch2_keylist_push(&op->insert_keys);
 
 	op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+	op->flags |= BCH_WRITE_DONE;
+
 	continue_at_nobarrier(cl, bch2_write_index, NULL);
 	return;
 err:
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
index e45dcf9635ae..c4c847306345 100644
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -36,6 +36,7 @@ enum bch_write_flags {
 	/* Internal: */
 	BCH_WRITE_JOURNAL_SEQ_PTR	= (1 << 10),
 	BCH_WRITE_SKIP_CLOSURE_PUT	= (1 << 11),
+	BCH_WRITE_DONE			= (1 << 12),
 };
 
 static inline u64 *op_journal_seq(struct bch_write_op *op)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 1ba8b62b9f2b..78f5fac06bf5 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -269,7 +269,7 @@ static inline void bch2_journal_res_put(struct journal *j,
 	if (!res->ref)
 		return;
 
-	lock_release(&j->res_map, 0, _THIS_IP_);
+	lock_release(&j->res_map, _THIS_IP_);
 
 	while (res->u64s)
 		bch2_journal_add_entry(j, res,
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index b82cd23fc848..67e495bc8aba 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -313,12 +313,12 @@ static void move_free(struct closure *cl)
 {
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
 	struct moving_context *ctxt = io->write.ctxt;
+	struct bvec_iter_all iter;
 	struct bio_vec *bv;
-	unsigned i;
 
 	bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
 
-	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i)
+	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
 		if (bv->bv_page)
 			__free_page(bv->bv_page);
 
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index e7787c5063ce..d3032a46e7f3 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -710,25 +710,15 @@ found:
 	return ret;
 }
 
-static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-			  struct qc_dqblk *qdq)
+static int bch2_set_quota_trans(struct btree_trans *trans,
+				struct bkey_i_quota *new_quota,
+				struct qc_dqblk *qdq)
 {
-	struct bch_fs *c = sb->s_fs_info;
-	struct btree_trans trans;
 	struct btree_iter *iter;
 	struct bkey_s_c k;
-	struct bkey_i_quota new_quota;
 	int ret;
 
-	if (sb->s_flags & SB_RDONLY)
-		return -EROFS;
-
-	bkey_quota_init(&new_quota.k_i);
-	new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-
-	bch2_trans_init(&trans, c, 0, 0);
-
-	iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
+	iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
 				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 	k = bch2_btree_iter_peek_slot(iter);
 
@@ -736,32 +726,43 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
 	if (unlikely(ret))
 		return ret;
 
-	switch (k.k->type) {
-	case KEY_TYPE_quota:
-		new_quota.v = *bkey_s_c_to_quota(k).v;
-		break;
-	}
+	if (k.k->type == KEY_TYPE_quota)
+		new_quota->v = *bkey_s_c_to_quota(k).v;
 
 	if (qdq->d_fieldmask & QC_SPC_SOFT)
-		new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
+		new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
 	if (qdq->d_fieldmask & QC_SPC_HARD)
-		new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
+		new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
 
 	if (qdq->d_fieldmask & QC_INO_SOFT)
-		new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
+		new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
 	if (qdq->d_fieldmask & QC_INO_HARD)
-		new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+		new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+
+	return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
+}
 
-	bch2_trans_update(&trans, iter, &new_quota.k_i, 0);
+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
+			  struct qc_dqblk *qdq)
+{
+	struct bch_fs *c = sb->s_fs_info;
+	struct btree_trans trans;
+	struct bkey_i_quota new_quota;
+	int ret;
 
-	ret = bch2_trans_commit(&trans, NULL, NULL, 0);
+	if (sb->s_flags & SB_RDONLY)
+		return -EROFS;
 
-	bch2_trans_exit(&trans);
+	bkey_quota_init(&new_quota.k_i);
+	new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
 
-	if (ret)
-		return ret;
+	bch2_trans_init(&trans, c, 0, 0);
 
-	ret = __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+	ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
+			    bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
+		__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+
+	bch2_trans_exit(&trans);
 
 	return ret;
 }
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index d2c275ce79ab..d347389771e0 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -889,6 +889,8 @@ int bch2_fs_start(struct bch_fs *c)
 	if (bch2_fs_init_fault("fs_start"))
 		goto err;
 
+	set_bit(BCH_FS_STARTED, &c->flags);
+
 	if (c->opts.read_only || c->opts.nochanges) {
 		bch2_fs_read_only(c);
 	} else {
@@ -900,7 +902,6 @@ int bch2_fs_start(struct bch_fs *c)
 			goto err;
 	}
 
-	set_bit(BCH_FS_STARTED, &c->flags);
 	print_mount_opts(c);
 	ret = 0;
 out:
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2b19a0038045..0128daba5970 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -664,35 +664,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
 	memset(s + bytes, c, rem);
 }
 
-static inline struct bio_vec next_contig_bvec(struct bio *bio,
-					      struct bvec_iter *iter)
-{
-	struct bio_vec bv = bio_iter_iovec(bio, *iter);
-
-	bio_advance_iter(bio, iter, bv.bv_len);
-#ifndef CONFIG_HIGHMEM
-	while (iter->bi_size) {
-		struct bio_vec next = bio_iter_iovec(bio, *iter);
-
-		if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len !=
-		    page_address(next.bv_page) + next.bv_offset)
-			break;
-
-		bv.bv_len += next.bv_len;
-		bio_advance_iter(bio, iter, next.bv_len);
-	}
-#endif
-	return bv;
-}
-
-#define __bio_for_each_contig_segment(bv, bio, iter, start)		\
-	for (iter = (start);						\
-	     (iter).bi_size &&						\
-		((bv = next_contig_bvec((bio), &(iter))), 1);)
-
-#define bio_for_each_contig_segment(bv, bio, iter)			\
-	__bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
-
 void sort_cmp_size(void *base, size_t num, size_t size,
 	  int (*cmp_func)(const void *, const void *, size_t),
 	  void (*swap_func)(void *, void *, size_t));
diff --git a/kernel/locking/six.c b/kernel/locking/six.c
index 9fa58b6fadc9..c778123505d0 100644
--- a/kernel/locking/six.c
+++ b/kernel/locking/six.c
@@ -15,7 +15,7 @@
 #endif
 
 #define six_acquire(l, t)	lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
-#define six_release(l)		lock_release(l, 0, _RET_IP_)
+#define six_release(l)		lock_release(l, _RET_IP_)
 
 struct six_lock_vals {
 	/* Value we add to the lock in order to take the lock: */
author	Kent Overstreet <kent.overstreet@gmail.com>	2020-05-13 17:59:29 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2020-05-13 17:59:29 -0400
commit	323707f353a328a569aa55956288aa91131eb727 (patch)
tree	38ff10a0690ba5223b5860ea44bfa01d6d130428
parent	e75846f83e05114fa4a1dbcb2ca4cd7f6b06030b (diff)