diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-11-04 14:11:53 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2019-11-04 18:00:43 -0500 |
commit | 44daa79fafb31235f39f1102a7f724b40e5376c0 (patch) | |
tree | b192392da7ddd18d27a0ad3299dac67d4596ab9a | |
parent | afffa3dc1e51a0884552ca70d09cc7a86b388e16 (diff) |
bcachefs: Move pagecache add lock to bcachefs code
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/fs-io.c | 90 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 48 | ||||
-rw-r--r-- | fs/bcachefs/fs.h | 37 | ||||
-rw-r--r-- | fs/inode.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 24 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | init/init_task.c | 1 | ||||
-rw-r--r-- | mm/filemap.c | 73 |
8 files changed, 116 insertions, 161 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 101661d0ea9f..4da04ba7cbd9 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -510,12 +510,12 @@ static void bch2_set_page_dirty(struct bch_fs *c, vm_fault_t bch2_page_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; - struct address_space *mapping = file->f_mapping; + struct bch_inode_info *inode = file_bch_inode(file); int ret; - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); ret = filemap_fault(vmf); - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); return ret; } @@ -543,7 +543,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) * a write_invalidate_inode_pages_range() that works without dropping * page lock before invalidating page */ - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); lock_page(page); isize = i_size_read(&inode->v); @@ -567,7 +567,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) wait_for_stable_page(page); out: - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); sb_end_pagefault(inode->v.i_sb); return ret; @@ -899,7 +899,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_SLOTS); - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); while ((page = readpage_iter_next(&readpages_iter))) { pgoff_t index = readpages_iter.offset + readpages_iter.idx; @@ -922,7 +922,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping, &readpages_iter); } - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); bch2_trans_exit(&trans); kfree(readpages_iter.pages); @@ -1303,8 +1303,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, bch2_page_reservation_init(c, inode, res); *fsdata = res; - /* Not strictly necessary - same reason as mkwrite(): */ - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) @@ -1356,7 +1355,7 @@ err: put_page(page); *pagep = NULL; err_unlock: - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); kfree(res); *fsdata = NULL; return ret; @@ -1400,7 +1399,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping, unlock_page(page); put_page(page); - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); bch2_page_reservation_put(c, inode, res); kfree(res); @@ -1558,7 +1557,7 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) ssize_t written = 0; int ret = 0; - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); do { unsigned offset = pos & (PAGE_SIZE - 1); @@ -1615,7 +1614,7 @@ again: balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(iter)); - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); return written ? written : ret; } @@ -1742,6 +1741,7 @@ start: ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); struct address_space *mapping = file->f_mapping; size_t count = iov_iter_count(iter); ssize_t ret; @@ -1767,9 +1767,9 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret >= 0) iocb->ki_pos += ret; } else { - pagecache_add_get(&mapping->add_lock); + bch2_pagecache_add_get(&inode->ei_pagecache_lock); ret = generic_file_read_iter(iocb, iter); - pagecache_add_put(&mapping->add_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); } return ret; @@ -1789,24 +1789,13 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct bio_vec *bv; unsigned unaligned; u64 new_i_size; - loff_t offset; bool sync; long ret; if (dio->loop) goto loop; - /* Write and invalidate pagecache range that we're writing to: */ - offset = req->ki_pos + (dio->op.written << 9); - ret = write_invalidate_inode_pages_range(mapping, - offset, - offset + iov_iter_count(&dio->iter) - 1); - if (unlikely(ret)) - goto err; - while (1) { - offset = req->ki_pos + (dio->op.written << 9); - if (kthread) use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); @@ -1836,14 +1825,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) goto err; } - /* gup might have faulted pages back in: */ - ret = write_invalidate_inode_pages_range(mapping, - offset, - offset + bio->bi_iter.bi_size - 1); - if (unlikely(ret)) - goto err; - - dio->op.pos = POS(inode->v.i_ino, offset >> 9); + dio->op.pos = POS(inode->v.i_ino, + (req->ki_pos >> 9) + dio->op.written); task_io_account_write(bio->bi_iter.bi_size); @@ -1895,7 +1878,7 @@ loop: ret = dio->op.error ?: ((long) dio->op.written << 9); err: - __pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); bch2_disk_reservation_put(c, &dio->op.res); bch2_quota_reservation_put(c, inode, &dio->quota_res); @@ -1961,7 +1944,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) goto err; inode_dio_begin(&inode->v); - __pagecache_block_get(&mapping->add_lock); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); extending = req->ki_pos + iter->count > inode->v.i_size; if (!extending) { @@ -2009,6 +1992,12 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) dio->op.opts.data_replicas)) goto err_put_bio; + ret = write_invalidate_inode_pages_range(mapping, + req->ki_pos, + req->ki_pos + iter->count - 1); + if (unlikely(ret)) + goto err_put_bio; + ret = bch2_dio_write_loop(dio); err: if (locked) @@ -2017,7 +2006,7 @@ err: req->ki_pos += ret; return ret; err_put_bio: - __pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); bch2_disk_reservation_put(c, &dio->op.res); bch2_quota_reservation_put(c, inode, &dio->quota_res); bio_put(bio); @@ -2266,7 +2255,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) int ret = 0; inode_dio_wait(&inode->v); - pagecache_block_get(&mapping->add_lock); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); /* * fetch current on disk i_size: inode is locked, i_size can only @@ -2337,7 +2326,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); err: - pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); return ret; } @@ -2346,14 +2335,13 @@ err: static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct address_space *mapping = inode->v.i_mapping; u64 discard_start = round_up(offset, block_bytes(c)) >> 9; u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9; int ret = 0; inode_lock(&inode->v); inode_dio_wait(&inode->v); - pagecache_block_get(&mapping->add_lock); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); ret = __bch2_truncate_page(inode, offset >> PAGE_SHIFT, @@ -2382,7 +2370,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len i_sectors_acct(c, inode, NULL, i_sectors_delta); } err: - pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; @@ -2413,7 +2401,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, */ inode_lock(&inode->v); inode_dio_wait(&inode->v); - pagecache_block_get(&mapping->add_lock); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); if (insert) { ret = -EFBIG; @@ -2600,7 +2588,7 @@ bkey_err: } err: bch2_trans_exit(&trans); - pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; } @@ -2624,7 +2612,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, inode_lock(&inode->v); inode_dio_wait(&inode->v); - pagecache_block_get(&mapping->add_lock); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { ret = inode_newsize_ok(&inode->v, end); @@ -2767,7 +2755,7 @@ bkey_err: } err: bch2_trans_exit(&trans); - pagecache_block_put(&mapping->add_lock); + bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; } @@ -2860,16 +2848,13 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, abs(pos_src - pos_dst) < len) return -EINVAL; - bch2_lock_inodes(INODE_LOCK, src, dst); + bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); file_update_time(file_dst); inode_dio_wait(&src->v); inode_dio_wait(&dst->v); - __pagecache_block_get(&src->v.i_mapping->add_lock); - __pagecache_block_get(&dst->v.i_mapping->add_lock); - ret = generic_remap_file_range_prep(file_src, pos_src, file_dst, pos_dst, &len, remap_flags); @@ -2908,10 +2893,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, i_size_write(&dst->v, pos_dst + len); spin_unlock(&dst->v.i_lock); err: - __pagecache_block_put(&dst->v.i_mapping->add_lock); - __pagecache_block_put(&src->v.i_mapping->add_lock); - - bch2_unlock_inodes(INODE_LOCK, src, dst); + bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); return ret; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 671993362e97..7d3d5c2045b3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -49,6 +49,53 @@ static void journal_seq_copy(struct bch_inode_info *dst, } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); } +static void __pagecache_lock_put(struct pagecache_lock *lock, long i) +{ + BUG_ON(atomic_long_read(&lock->v) == 0); + + if (atomic_long_sub_return_release(i, &lock->v) == 0) + wake_up_all(&lock->wait); +} + +static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i) +{ + long v = atomic_long_read(&lock->v), old; + + do { + old = v; + + if (i > 0 ? v < 0 : v > 0) + return false; + } while ((v = atomic_long_cmpxchg_acquire(&lock->v, + old, old + i)) != old); + return true; +} + +static void __pagecache_lock_get(struct pagecache_lock *lock, long i) +{ + wait_event(lock->wait, __pagecache_lock_tryget(lock, i)); +} + +void bch2_pagecache_add_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, 1); +} + +void bch2_pagecache_add_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, 1); +} + +void bch2_pagecache_block_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, -1); +} + +void bch2_pagecache_block_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, -1); +} + void bch2_inode_update_after_write(struct bch_fs *c, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -1090,6 +1137,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) inode_init_once(&inode->v); mutex_init(&inode->ei_update_lock); + pagecache_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); inode->ei_journal_seq = 0; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 406056668516..eda903a45325 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -10,6 +10,26 @@ #include <linux/seqlock.h> #include <linux/stat.h> +/* + * Two-state lock - can be taken for add or block - both states are shared, + * like read side of rwsem, but conflict with other state: + */ +struct pagecache_lock { + atomic_long_t v; + wait_queue_head_t wait; +}; + +static inline void pagecache_lock_init(struct pagecache_lock *lock) +{ + atomic_long_set(&lock->v, 0); + init_waitqueue_head(&lock->wait); +} + +void bch2_pagecache_add_put(struct pagecache_lock *); +void bch2_pagecache_add_get(struct pagecache_lock *); +void bch2_pagecache_block_put(struct pagecache_lock *); +void bch2_pagecache_block_get(struct pagecache_lock *); + struct bch_inode_info { struct inode v; @@ -18,6 +38,8 @@ struct bch_inode_info { u64 ei_quota_reserved; unsigned long ei_last_dirtied; + struct pagecache_lock ei_pagecache_lock; + struct mutex ei_quota_lock; struct bch_qid ei_qid; @@ -37,7 +59,8 @@ static inline int ptrcmp(void *l, void *r) enum bch_inode_lock_op { INODE_LOCK = (1U << 0), - INODE_UPDATE_LOCK = (1U << 1), + INODE_PAGECACHE_BLOCK = (1U << 1), + INODE_UPDATE_LOCK = (1U << 2), }; #define bch2_lock_inodes(_locks, ...) \ @@ -49,9 +72,11 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if (_locks & INODE_LOCK) \ + if ((_locks) & INODE_LOCK) \ down_write_nested(&a[i]->v.i_rwsem, i); \ - if (_locks & INODE_UPDATE_LOCK) \ + if ((_locks) & INODE_PAGECACHE_BLOCK) \ + bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\ + if ((_locks) & INODE_UPDATE_LOCK) \ mutex_lock_nested(&a[i]->ei_update_lock, i);\ } \ } while (0) @@ -65,9 +90,11 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if (_locks & INODE_LOCK) \ + if ((_locks) & INODE_LOCK) \ up_write(&a[i]->v.i_rwsem); \ - if (_locks & INODE_UPDATE_LOCK) \ + if ((_locks) & INODE_PAGECACHE_BLOCK) \ + bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\ + if ((_locks) & INODE_UPDATE_LOCK) \ mutex_unlock(&a[i]->ei_update_lock); \ } \ } while (0) diff --git a/fs/inode.c b/fs/inode.c index ac77a7af6f23..76ffb3a86adb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -363,7 +363,6 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); - pagecache_lock_init(&mapping->add_lock); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e7b0a95260f..5a85a3878189 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -421,28 +421,6 @@ int pagecache_write_end(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); -/* - * Two-state lock - can be taken for add or block - both states are shared, - * like read side of rwsem, but conflict with other state: - */ -struct pagecache_lock { - atomic_long_t v; - wait_queue_head_t wait; -}; - -static inline void pagecache_lock_init(struct pagecache_lock *lock) -{ - atomic_long_set(&lock->v, 0); - init_waitqueue_head(&lock->wait); -} - -void pagecache_add_put(struct pagecache_lock *); -void pagecache_add_get(struct pagecache_lock *); -void __pagecache_block_put(struct pagecache_lock *); -void __pagecache_block_get(struct pagecache_lock *); -void pagecache_block_put(struct pagecache_lock *); -void pagecache_block_get(struct pagecache_lock *); - /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. @@ -477,8 +455,6 @@ struct address_space { spinlock_t private_lock; struct list_head private_list; void *private_data; - struct pagecache_lock add_lock - ____cacheline_aligned_in_smp; /* protects adding new pages */ } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but diff --git a/include/linux/sched.h b/include/linux/sched.h index f3bbf171e97d..425c0d8557a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -938,9 +938,6 @@ struct task_struct { unsigned int in_ubsan; #endif - /* currently held lock, for avoiding recursing in fault path: */ - struct pagecache_lock *pagecache_lock; - /* Journalling filesystem info: */ void *journal_info; diff --git a/init/init_task.c b/init/init_task.c index 6c0f1257234b..bc6a4b9fb1e7 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -116,7 +116,6 @@ struct task_struct init_task }, .blocked = {{0}}, .alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock), - .pagecache_lock = NULL, .journal_info = NULL, INIT_CPU_TIMERS(init_task) .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), diff --git a/mm/filemap.c b/mm/filemap.c index 15d8fc9a7a10..e683815217c8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -115,73 +115,6 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ -static void __pagecache_lock_put(struct pagecache_lock *lock, long i) -{ - BUG_ON(atomic_long_read(&lock->v) == 0); - - if (atomic_long_sub_return_release(i, &lock->v) == 0) - wake_up_all(&lock->wait); -} - -static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i) -{ - long v = atomic_long_read(&lock->v), old; - - do { - old = v; - - if (i > 0 ? v < 0 : v > 0) - return false; - } while ((v = atomic_long_cmpxchg_acquire(&lock->v, - old, old + i)) != old); - return true; -} - -static void __pagecache_lock_get(struct pagecache_lock *lock, long i) -{ - wait_event(lock->wait, __pagecache_lock_tryget(lock, i)); -} - -void pagecache_add_put(struct pagecache_lock *lock) -{ - __pagecache_lock_put(lock, 1); -} -EXPORT_SYMBOL(pagecache_add_put); - -void pagecache_add_get(struct pagecache_lock *lock) -{ - __pagecache_lock_get(lock, 1); -} -EXPORT_SYMBOL(pagecache_add_get); - -void __pagecache_block_put(struct pagecache_lock *lock) -{ - __pagecache_lock_put(lock, -1); -} -EXPORT_SYMBOL(__pagecache_block_put); - -void __pagecache_block_get(struct pagecache_lock *lock) -{ - __pagecache_lock_get(lock, -1); -} -EXPORT_SYMBOL(__pagecache_block_get); - -void pagecache_block_put(struct pagecache_lock *lock) -{ - BUG_ON(current->pagecache_lock != lock); - current->pagecache_lock = NULL; - __pagecache_lock_put(lock, -1); -} -EXPORT_SYMBOL(pagecache_block_put); - -void pagecache_block_get(struct pagecache_lock *lock) -{ - __pagecache_lock_get(lock, -1); - BUG_ON(current->pagecache_lock); - current->pagecache_lock = lock; -} -EXPORT_SYMBOL(pagecache_block_get); - static int page_cache_tree_insert_vec(struct page *pages[], unsigned nr_pages, struct address_space *mapping, @@ -977,9 +910,6 @@ static int add_to_page_cache_vec(struct page **pages, unsigned nr_pages, page->index = index + i; } - if (current->pagecache_lock != &mapping->add_lock) - pagecache_add_get(&mapping->add_lock); - error = page_cache_tree_insert_vec(pages, nr_pages, mapping, index, gfp_mask, shadow); if (error > 0) { @@ -987,9 +917,6 @@ static int add_to_page_cache_vec(struct page **pages, unsigned nr_pages, error = 0; } - if (current->pagecache_lock != &mapping->add_lock) - pagecache_add_put(&mapping->add_lock); - for (i = 0; i < nr_added; i++) { struct page *page = pages[i]; |