diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-06-30 16:35:37 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-12-10 18:28:29 -0500 |
commit | 8276985a0d089cdff050641421e47776d7b34142 (patch) | |
tree | f2f882c9fe56ba4e27872a021a604c6c5b838725 | |
parent | 73f27dc83d8bd75a25b5d60f8806b0c36d3d8ed2 (diff) |
bcachefs: Fixes for 4.19
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/checksum.c | 31 | ||||
-rw-r--r-- | fs/bcachefs/checksum.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/compress.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.c | 398 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 21 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/super.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 31 |
16 files changed, 455 insertions, 98 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eb5b40804773..d3d67cc71faa 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -759,7 +759,7 @@ struct bch_fs { ZSTD_parameters zstd_params; struct crypto_shash *sha256; - struct crypto_sync_skcipher *chacha20; + struct crypto_skcipher *chacha20; struct crypto_shash *poly1305; atomic64_t key_version; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 09774f56f11c..1dc1f7460f9a 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -81,7 +81,8 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) if (!b->data) return -ENOMEM; - b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp); + b->aux_data = __vmalloc(btree_aux_data_bytes(b), gfp, + PAGE_KERNEL_EXEC); if (!b->aux_data) { kvpfree(b->data, btree_bytes(c)); b->data = NULL; diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 1a4b11e99cc4..6de92263af6c 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -121,7 +121,7 @@ static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offse bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, bytes); - nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE)); + nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE)); } bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 3d88719ba86c..a01073e54a33 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -10,7 +10,7 @@ #include <linux/random.h> #include <linux/scatterlist.h> #include <crypto/algapi.h> -#include <crypto/chacha.h> +#include <crypto/chacha20.h> #include <crypto/hash.h> #include <crypto/poly1305.h> #include <crypto/skcipher.h> @@ -68,21 +68,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t } } -static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm, +static inline void do_encrypt_sg(struct crypto_skcipher *tfm, struct nonce nonce, struct scatterlist *sg, size_t len) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); + SKCIPHER_REQUEST_ON_STACK(req, tfm); int ret; - skcipher_request_set_sync_tfm(req, tfm); + skcipher_request_set_tfm(req, tfm); skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); BUG_ON(ret); } -static inline void do_encrypt(struct crypto_sync_skcipher *tfm, +static inline void do_encrypt(struct crypto_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { @@ -95,8 +95,8 @@ static inline void do_encrypt(struct crypto_sync_skcipher *tfm, int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, void *buf, size_t len) { - struct crypto_sync_skcipher *chacha20 = - crypto_alloc_sync_skcipher("chacha20", 0, 0); + struct crypto_skcipher *chacha20 = + crypto_alloc_skcipher("chacha20", 0, 0); int ret; if (!chacha20) { @@ -104,8 +104,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, return PTR_ERR(chacha20); } - ret = crypto_skcipher_setkey(&chacha20->base, - (void *) key, sizeof(*key)); + ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key)); if (ret) { pr_err("crypto_skcipher_setkey() error: %i", ret); goto err; @@ -113,7 +112,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, do_encrypt(chacha20, nonce, buf, len); err: - crypto_free_sync_skcipher(chacha20); + crypto_free_skcipher(chacha20); return ret; } @@ -200,7 +199,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, kunmap_atomic(p); } #else - __bio_for_each_bvec(bv, bio, *iter, *iter) + __bio_for_each_contig_segment(bv, bio, *iter, *iter) crc = bch2_checksum_update(type, crc, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); @@ -225,7 +224,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, kunmap_atomic(p); } #else - __bio_for_each_bvec(bv, bio, *iter, *iter) + __bio_for_each_contig_segment(bv, bio, *iter, *iter) crypto_shash_update(desc, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); @@ -464,7 +463,7 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { if (!c->chacha20) - c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); + c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0); if (IS_ERR(c->chacha20)) { bch_err(c, "error requesting chacha20 module: %li", PTR_ERR(c->chacha20)); @@ -547,7 +546,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) goto err; } - ret = crypto_skcipher_setkey(&c->chacha20->base, + ret = crypto_skcipher_setkey(c->chacha20, (void *) &key.key, sizeof(key.key)); if (ret) goto err; @@ -575,7 +574,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c) if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); if (!IS_ERR_OR_NULL(c->chacha20)) - crypto_free_sync_skcipher(c->chacha20); + crypto_free_skcipher(c->chacha20); if (!IS_ERR_OR_NULL(c->sha256)) crypto_free_shash(c->sha256); } @@ -607,7 +606,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) if (ret) goto out; - ret = crypto_skcipher_setkey(&c->chacha20->base, + ret = crypto_skcipher_setkey(c->chacha20, (void *) &key.key, sizeof(key.key)); if (ret) goto out; diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 24dee8039d57..833537cc8fd0 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -7,7 +7,7 @@ #include "super-io.h" #include <linux/crc64.h> -#include <crypto/chacha.h> +#include <crypto/chacha20.h> static inline bool bch2_checksum_mergeable(unsigned type) { @@ -138,9 +138,9 @@ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r) /* for skipping ahead and encrypting/decrypting at an offset: */ static inline struct nonce nonce_add(struct nonce nonce, unsigned offset) { - EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1)); + EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1)); - le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE); + le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE); return nonce; } diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index aebf46bb1d21..0d68a277cfd7 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -45,7 +45,7 @@ static bool bio_phys_contig(struct bio *bio, struct bvec_iter start) struct bvec_iter iter; void *expected_start = NULL; - __bio_for_each_bvec(bv, bio, iter, start) { + __bio_for_each_segment(bv, bio, iter, start) { if (expected_start && expected_start != page_address(bv.bv_page) + bv.bv_offset) return false; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dc16a7731e38..93ad6dbe8c16 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -87,24 +87,6 @@ struct dio_read { struct bch_read_bio rbio; }; -/* stub version */ -static int add_to_page_cache_lru_vec(struct address_space *mapping, - struct page **pages, - unsigned nr_pages, - pgoff_t offset, gfp_t gfp_mask) -{ - int i, err = 0; - - for (i = 0; i < nr_pages; i++) { - err = add_to_page_cache_lru(pages[i], mapping, - offset + i, gfp_mask); - if (err) - break; - } - - return i ?: err; -} - /* pagecache_block must be held */ static int write_invalidate_inode_pages_range(struct address_space *mapping, loff_t start, loff_t end) @@ -299,13 +281,28 @@ static inline struct bch_page_state *bch2_page_state(struct page *page) /* for newly allocated pages: */ static void __bch2_page_state_release(struct page *page) { - kfree(detach_page_private(page)); + struct bch_page_state *s = __bch2_page_state(page); + + if (!s) + return; + + ClearPagePrivate(page); + set_page_private(page, 0); + put_page(page); + kfree(s); } static void bch2_page_state_release(struct page *page) { - EBUG_ON(!PageLocked(page)); - __bch2_page_state_release(page); + struct bch_page_state *s = bch2_page_state(page); + + if (!s) + return; + + ClearPagePrivate(page); + set_page_private(page, 0); + put_page(page); + kfree(s); } /* for newly allocated pages: */ @@ -319,7 +316,13 @@ static struct bch_page_state *__bch2_page_state_create(struct page *page, return NULL; spin_lock_init(&s->lock); - attach_page_private(page, s); + /* + * migrate_page_move_mapping() assumes that pages with private data + * have their count elevated by 1. + */ + get_page(page); + set_page_private(page, (unsigned long) s); + SetPagePrivate(page); return s; } @@ -642,12 +645,18 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, EBUG_ON(!PageLocked(page)); EBUG_ON(!PageLocked(newpage)); - ret = migrate_page_move_mapping(mapping, newpage, page, 0); + ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; - if (PagePrivate(page)) - attach_page_private(newpage, detach_page_private(page)); + if (PagePrivate(page)) { + ClearPagePrivate(page); + get_page(newpage); + set_page_private(newpage, page_private(page)); + set_page_private(page, 0); + put_page(page); + SetPagePrivate(newpage); + } if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); @@ -661,10 +670,10 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, static void bch2_readpages_end_io(struct bio *bio) { - struct bvec_iter_all iter; struct bio_vec *bv; + unsigned i; - bio_for_each_segment_all(bv, bio, iter) { + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; if (!bio->bi_status) { @@ -683,29 +692,31 @@ struct readpages_iter { struct address_space *mapping; struct page **pages; unsigned nr_pages; + unsigned nr_added; unsigned idx; pgoff_t offset; }; static int readpages_iter_init(struct readpages_iter *iter, - struct readahead_control *ractl) + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) { - unsigned i, nr_pages = readahead_count(ractl); - memset(iter, 0, sizeof(*iter)); - iter->mapping = ractl->mapping; - iter->offset = readahead_index(ractl); - iter->nr_pages = nr_pages; + iter->mapping = mapping; + iter->offset = list_last_entry(pages, struct page, lru)->index; iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); if (!iter->pages) return -ENOMEM; - nr_pages = __readahead_batch(ractl, iter->pages, nr_pages); - for (i = 0; i < nr_pages; i++) { - __bch2_page_state_create(iter->pages[i], __GFP_NOFAIL); - put_page(iter->pages[i]); + while (!list_empty(pages)) { + struct page *page = list_last_entry(pages, struct page, lru); + + __bch2_page_state_create(page, __GFP_NOFAIL); + + iter->pages[iter->nr_pages++] = page; + list_del(&page->lru); } return 0; @@ -713,9 +724,41 @@ static int readpages_iter_init(struct readpages_iter *iter, static inline struct page *readpage_iter_next(struct readpages_iter *iter) { - if (iter->idx >= iter->nr_pages) - return NULL; + struct page *page; + unsigned i; + int ret; + + BUG_ON(iter->idx > iter->nr_added); + BUG_ON(iter->nr_added > iter->nr_pages); + + if (iter->idx < iter->nr_added) + goto out; + + while (1) { + if (iter->idx == iter->nr_pages) + return NULL; + + ret = add_to_page_cache_lru_vec(iter->mapping, + iter->pages + iter->nr_added, + iter->nr_pages - iter->nr_added, + iter->offset + iter->nr_added, + GFP_NOFS); + if (ret > 0) + break; + + page = iter->pages[iter->nr_added]; + iter->idx++; + iter->nr_added++; + + __bch2_page_state_release(page); + put_page(page); + } + iter->nr_added += ret; + + for (i = iter->idx; i < iter->nr_added; i++) + put_page(iter->pages[i]); +out: EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx); return iter->pages[iter->idx]; @@ -776,8 +819,11 @@ static void readpage_bio_extend(struct readpages_iter *iter, if (!get_more) break; - page = xa_load(&iter->mapping->i_pages, page_offset); - if (page && !xa_is_value(page)) + rcu_read_lock(); + page = radix_tree_lookup(&iter->mapping->i_pages, page_offset); + rcu_read_unlock(); + + if (page && !radix_tree_exceptional_entry(page)) break; page = __page_cache_alloc(readahead_gfp_mask(iter->mapping)); @@ -883,9 +929,10 @@ retry: bkey_on_stack_exit(&sk, c); } -void bch2_readahead(struct readahead_control *ractl) +int bch2_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) { - struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); + struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, &inode->ei_inode); struct btree_trans trans; @@ -894,7 +941,7 @@ void bch2_readahead(struct readahead_control *ractl) struct readpages_iter readpages_iter; int ret; - ret = readpages_iter_init(&readpages_iter, ractl); + ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages); BUG_ON(ret); bch2_trans_init(&trans, c, 0, 0); @@ -929,6 +976,8 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_exit(&trans); kfree(readpages_iter.pages); + + return 0; } static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, @@ -1028,35 +1077,34 @@ static void bch2_writepage_io_done(struct closure *cl) struct bch_writepage_io, cl); struct bch_fs *c = io->op.c; struct bio *bio = &io->op.wbio.bio; - struct bvec_iter_all iter; struct bio_vec *bvec; - unsigned i; + unsigned i, j; if (io->op.error) { set_bit(EI_INODE_ERROR, &io->inode->ei_flags); - bio_for_each_segment_all(bvec, bio, iter) { + bio_for_each_segment_all(bvec, bio, i) { struct bch_page_state *s; SetPageError(bvec->bv_page); - mapping_set_error(bvec->bv_page->mapping, -EIO); + mapping_set_error(io->inode->v.i_mapping, -EIO); s = __bch2_page_state(bvec->bv_page); spin_lock(&s->lock); - for (i = 0; i < PAGE_SECTORS; i++) - s->s[i].nr_replicas = 0; + for (j = 0; j < PAGE_SECTORS; j++) + s->s[j].nr_replicas = 0; spin_unlock(&s->lock); } } if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { - bio_for_each_segment_all(bvec, bio, iter) { + bio_for_each_segment_all(bvec, bio, i) { struct bch_page_state *s; s = __bch2_page_state(bvec->bv_page); spin_lock(&s->lock); - for (i = 0; i < PAGE_SECTORS; i++) - s->s[i].nr_replicas = 0; + for (j = 0; j < PAGE_SECTORS; j++) + s->s[j].nr_replicas = 0; spin_unlock(&s->lock); } } @@ -1080,7 +1128,7 @@ static void bch2_writepage_io_done(struct closure *cl) */ i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); - bio_for_each_segment_all(bvec, bio, iter) { + bio_for_each_segment_all(bvec, bio, i) { struct bch_page_state *s = __bch2_page_state(bvec->bv_page); if (atomic_dec_and_test(&s->write_count)) @@ -1234,7 +1282,7 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio, PAGE_SIZE) || + bio_full(&w->io->op.wbio.bio) || w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= (BIO_MAX_PAGES * PAGE_SIZE) || bio_end_sector(&w->io->op.wbio.bio) != sector)) @@ -1803,9 +1851,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct bch_inode_info *inode = file_bch_inode(req->ki_filp); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bio *bio = &dio->op.wbio.bio; - struct bvec_iter_all iter; struct bio_vec *bv; - unsigned unaligned, iter_count; + unsigned i, unaligned, iter_count; bool sync = dio->sync, dropped_locks; long ret; @@ -1816,7 +1863,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) iter_count = dio->iter.count; if (kthread) - kthread_use_mm(dio->mm); + use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); current->faults_disabled_mapping = mapping; @@ -1826,7 +1873,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) current->faults_disabled_mapping = NULL; if (kthread) - kthread_unuse_mm(dio->mm); + unuse_mm(dio->mm); /* * If the fault handler returned an error but also signalled @@ -1859,7 +1906,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) * bio_iov_iter_get_pages was only able to get < * blocksize worth of pages: */ - bio_for_each_segment_all(bv, bio, iter) + bio_for_each_segment_all(bv, bio, i) put_page(bv->bv_page); ret = -EFAULT; goto err; @@ -1922,7 +1969,7 @@ loop: i_size_write(&inode->v, req->ki_pos); spin_unlock(&inode->v.i_lock); - bio_for_each_segment_all(bv, bio, iter) + bio_for_each_segment_all(bv, bio, i) put_page(bv->bv_page); if (dio->op.error) { @@ -2843,6 +2890,235 @@ static void mark_range_unallocated(struct bch_inode_info *inode, } while (index <= end_index); } +static int generic_access_check_limits(struct file *file, loff_t pos, + loff_t *count) +{ + struct inode *inode = file->f_mapping->host; + loff_t max_size = inode->i_sb->s_maxbytes; + + if (!(file->f_flags & O_LARGEFILE)) + max_size = MAX_NON_LFS; + + if (unlikely(pos >= max_size)) + return -EFBIG; + *count = min(*count, max_size - pos); + return 0; +} + +static int generic_write_check_limits(struct file *file, loff_t pos, + loff_t *count) +{ + loff_t limit = rlimit(RLIMIT_FSIZE); + + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + *count = min(*count, limit - pos); + } + + return generic_access_check_limits(file, pos, count); +} + +static int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *req_count, unsigned int remap_flags) +{ + struct inode *inode_in = file_in->f_mapping->host; + struct inode *inode_out = file_out->f_mapping->host; + uint64_t count = *req_count; + uint64_t bcount; + loff_t size_in, size_out; + loff_t bs = inode_out->i_sb->s_blocksize; + int ret; + + /* The start of both ranges must be aligned to an fs block. */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) + return -EINVAL; + + /* Ensure offsets don't wrap. */ + if (pos_in + count < pos_in || pos_out + count < pos_out) + return -EINVAL; + + size_in = i_size_read(inode_in); + size_out = i_size_read(inode_out); + + /* Dedupe requires both ranges to be within EOF. */ + if ((remap_flags & REMAP_FILE_DEDUP) && + (pos_in >= size_in || pos_in + count > size_in || + pos_out >= size_out || pos_out + count > size_out)) + return -EINVAL; + + /* Ensure the infile range is within the infile. */ + if (pos_in >= size_in) + return -EINVAL; + count = min(count, size_in - (uint64_t)pos_in); + + ret = generic_access_check_limits(file_in, pos_in, &count); + if (ret) + return ret; + + ret = generic_write_check_limits(file_out, pos_out, &count); + if (ret) + return ret; + + /* + * If the user wanted us to link to the infile's EOF, round up to the + * next block boundary for this check. + * + * Otherwise, make sure the count is also block-aligned, having + * already confirmed the starting offsets' block alignment. + */ + if (pos_in + count == size_in) { + bcount = ALIGN(size_in, bs) - pos_in; + } else { + if (!IS_ALIGNED(count, bs)) + count = ALIGN_DOWN(count, bs); + bcount = count; + } + + /* Don't allow overlapped cloning within the same file. */ + if (inode_in == inode_out && + pos_out + bcount > pos_in && + pos_out < pos_in + bcount) + return -EINVAL; + + /* + * We shortened the request but the caller can't deal with that, so + * bounce the request back to userspace. + */ + if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) + return -EINVAL; + + *req_count = count; + return 0; +} + +static int generic_remap_check_len(struct inode *inode_in, + struct inode *inode_out, + loff_t pos_out, + loff_t *len, + unsigned int remap_flags) +{ + u64 blkmask = i_blocksize(inode_in) - 1; + loff_t new_len = *len; + + if ((*len & blkmask) == 0) + return 0; + + if ((remap_flags & REMAP_FILE_DEDUP) || + pos_out + *len < i_size_read(inode_out)) + new_len &= ~blkmask; + + if (new_len == *len) + return 0; + + if (remap_flags & REMAP_FILE_CAN_SHORTEN) { + *len = new_len; + return 0; + } + + return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; +} + +static int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + bool same_inode = (inode_in == inode_out); + int ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Don't reflink dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (*len == 0) { + loff_t isize = i_size_read(inode_in); + + if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) + return 0; + if (pos_in > isize) + return -EINVAL; + *len = isize - pos_in; + if (*len == 0) + return 0; + } + + /* Check that we don't violate system file offset limits. */ + ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, + remap_flags); + if (ret) + return ret; + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + *len - 1); + if (ret) + return ret; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + *len - 1); + if (ret) + return ret; + + /* + * Check that the extents are the same. + */ + if (remap_flags & REMAP_FILE_DEDUP) { + bool is_same = false; + + ret = vfs_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same); + if (ret) + return ret; + if (!is_same) + return -EBADE; + } + + ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, + remap_flags); + if (ret) + return ret; + + /* If can't alter the file contents, we're done. */ + if (!(remap_flags & REMAP_FILE_DEDUP)) { + /* Update the timestamps, since we can alter file contents. */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + return ret; + } + + /* + * Clear the security bits if the process is not being run by + * root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + return ret; + } + + return 0; +} + loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, struct file *file_dst, loff_t pos_dst, loff_t len, unsigned remap_flags) diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index 2537a3d25ede..1b593ea707d5 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -19,7 +19,8 @@ int bch2_writepage(struct page *, struct writeback_control *); int bch2_readpage(struct file *, struct page *); int bch2_writepages(struct address_space *, struct writeback_control *); -void bch2_readahead(struct readahead_control *); +int bch2_readpages(struct file *, struct address_space *, + struct list_head *, unsigned); int bch2_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **); @@ -34,6 +35,10 @@ int bch2_fsync(struct file *, loff_t, loff_t, int); int bch2_truncate(struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); +#define REMAP_FILE_ADVISORY (0) +#define REMAP_FILE_DEDUP (1 << 0) +#define REMAP_FILE_CAN_SHORTEN (1 << 1) + loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, loff_t, loff_t, unsigned); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index e3edca4d265b..cc0a4b0f0e5b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -892,10 +892,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bool have_extent = false; int ret = 0; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) - return ret; - if (start + len < start) return -EINVAL; @@ -999,6 +995,15 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) return bch2_readdir(c, inode->v.i_ino, ctx); } +static int bch2_clone_file_range(struct file *file_src, loff_t pos_src, + struct file *file_dst, loff_t pos_dst, + u64 len) +{ + return bch2_remap_file_range(file_src, pos_src, + file_dst, pos_dst, + len, 0); +} + static const struct file_operations bch_file_operations = { .llseek = bch2_llseek, .read_iter = bch2_read_iter, @@ -1016,7 +1021,7 @@ static const struct file_operations bch_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = bch2_compat_fs_ioctl, #endif - .remap_file_range = bch2_remap_file_range, + .clone_file_range = bch2_clone_file_range, }; static const struct inode_operations bch_file_inode_operations = { @@ -1086,7 +1091,7 @@ static const struct address_space_operations bch_address_space_operations = { .writepage = bch2_writepage, .readpage = bch2_readpage, .writepages = bch2_writepages, - .readahead = bch2_readahead, + .readpages = bch2_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .write_begin = bch2_write_begin, .write_end = bch2_write_end, @@ -1572,7 +1577,9 @@ got_sb: if (ret) goto err_put_super; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; + sb->s_bdi->congested_fn = bch2_congested; + sb->s_bdi->congested_data = c; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; for_each_online_member(ca, c, i) { struct block_device *bdev = ca->disk_sb.bdev; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 15b58a33c8ff..fda2429602b1 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -135,10 +135,10 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) { - struct bvec_iter_all iter; struct bio_vec *bv; + unsigned i; - bio_for_each_segment_all(bv, bio, iter) + bio_for_each_segment_all(bv, bio, i) if (bv->bv_page != ZERO_PAGE(0)) mempool_free(bv->bv_page, &c->bio_bounce_pages); bio->bi_vcnt = 0; diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 1db1f190a168..34ef66994b73 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -288,7 +288,7 @@ static inline void bch2_journal_res_put(struct journal *j, if (!res->ref) return; - lock_release(&j->res_map, _THIS_IP_); + lock_release(&j->res_map, 0, _THIS_IP_); while (res->u64s) bch2_journal_add_entry(j, res, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index b77d4e7f42d6..61bcd77190a7 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -11,6 +11,7 @@ #include <linux/kthread.h> #include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <trace/events/bcachefs.h> /* Free space calculations: */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6633d21f604a..fbeaa3b67326 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -326,12 +326,12 @@ static void move_free(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); struct moving_context *ctxt = io->write.ctxt; - struct bvec_iter_all iter; struct bio_vec *bv; + unsigned i; bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); - bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter) + bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i) if (bv->bv_page) __free_page(bv->bv_page); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 651fbc5d52b1..2d5a9b1c6a9d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -148,6 +148,44 @@ struct bch_fs *bch2_uuid_to_fs(uuid_le uuid) return c; } +int bch2_congested(void *data, int bdi_bits) +{ + struct bch_fs *c = data; + struct backing_dev_info *bdi; + struct bch_dev *ca; + unsigned i; + int ret = 0; + + rcu_read_lock(); + if (bdi_bits & (1 << WB_sync_congested)) { + /* Reads - check all devices: */ + for_each_readable_member(ca, c, i) { + bdi = ca->disk_sb.bdev->bd_bdi; + + if (bdi_congested(bdi, bdi_bits)) { + ret = 1; + break; + } + } + } else { + const struct bch_devs_mask *devs = + bch2_target_to_mask(c, c->opts.foreground_target) ?: + &c->rw_devs[BCH_DATA_user]; + + for_each_member_device_rcu(ca, c, i, devs) { + bdi = ca->disk_sb.bdev->bd_bdi; + + if (bdi_congested(bdi, bdi_bits)) { + ret = 1; + break; + } + } + } + rcu_read_unlock(); + + return ret; +} + /* Filesystem RO/RW: */ /* diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 02c81f3555c3..048ffec622af 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -199,6 +199,7 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) struct bch_fs *bch2_bdev_to_fs(struct block_device *); struct bch_fs *bch2_uuid_to_fs(uuid_le); +int bch2_congested(void *, int); bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6e5335440b4b..e8a7df61ff5c 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -88,7 +88,7 @@ static inline void *vpmalloc(size_t size, gfp_t gfp_mask) { return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN, get_order(size)) ?: - __vmalloc(size, gfp_mask); + __vmalloc(size, gfp_mask, PAGE_KERNEL); } static inline void kvpfree(void *p, size_t size) @@ -653,6 +653,35 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) memset(s + bytes, c, rem); } +static inline struct bio_vec next_contig_bvec(struct bio *bio, + struct bvec_iter *iter) +{ + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + bio_advance_iter(bio, iter, bv.bv_len); +#ifndef CONFIG_HIGHMEM + while (iter->bi_size) { + struct bio_vec next = bio_iter_iovec(bio, *iter); + + if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len != + page_address(next.bv_page) + next.bv_offset) + break; + + bv.bv_len += next.bv_len; + bio_advance_iter(bio, iter, next.bv_len); + } +#endif + return bv; +} + +#define __bio_for_each_contig_segment(bv, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bv = next_contig_bvec((bio), &(iter))), 1);) + +#define bio_for_each_contig_segment(bv, bio, iter) \ + __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter) + void sort_cmp_size(void *base, size_t num, size_t size, int (*cmp_func)(const void *, const void *, size_t), void (*swap_func)(void *, void *, size_t)); |