diff options
-rw-r--r-- | fs/inode.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 23 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | mm/filemap.c | 91 |
5 files changed, 115 insertions, 5 deletions
diff --git a/fs/inode.c b/fs/inode.c index 4ccbc21b30ce..6b0c8d7f5901 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -346,6 +346,7 @@ void address_space_init_once(struct address_space *mapping) { memset(mapping, 0, sizeof(*mapping)); INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); + pagecache_lock_init(&mapping->add_lock); spin_lock_init(&mapping->tree_lock); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); diff --git a/include/linux/fs.h b/include/linux/fs.h index dd288148a6b1..e54b73783412 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -428,9 +428,32 @@ int pagecache_write_end(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); +/* + * Two-state lock - can be taken for add or block - both states are shared, + * like read side of rwsem, but conflict with other state: + */ +struct pagecache_lock { + atomic_long_t v; + wait_queue_head_t wait; +}; + +static inline void pagecache_lock_init(struct pagecache_lock *lock) +{ + atomic_long_set(&lock->v, 0); + init_waitqueue_head(&lock->wait); +} + +void pagecache_add_put(struct pagecache_lock *); +void pagecache_add_get(struct pagecache_lock *); +void __pagecache_block_put(struct pagecache_lock *); +void __pagecache_block_get(struct pagecache_lock *); +void pagecache_block_put(struct pagecache_lock *); +void pagecache_block_get(struct pagecache_lock *); + struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ + struct pagecache_lock add_lock; /* protects adding new pages */ spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f8834f820ec2..1f59b57b464e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -235,6 +235,7 @@ extern struct task_group root_task_group; .signal = {{0}}}, \ .blocked = {{0}}, \ .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ + .pagecache_lock = NULL, \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 96488970b8dc..b331276f9d2f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,6 +134,7 @@ struct perf_event_context; struct blk_plug; struct filename; struct nameidata; +struct pagecache_lock; #define VMACACHE_BITS 2 #define VMACACHE_SIZE (1U << VMACACHE_BITS) @@ -1724,6 +1725,9 @@ struct task_struct { unsigned int in_ubsan; #endif + /* currently held lock, for avoiding recursing in fault path: */ + struct pagecache_lock *pagecache_lock; + /* journalling filesystem info */ void *journal_info; diff --git a/mm/filemap.c b/mm/filemap.c index 20f3b1f33f0e..d6e1b48aee03 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,6 +110,73 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static void __pagecache_lock_put(struct pagecache_lock *lock, long i) +{ + BUG_ON(atomic_long_read(&lock->v) == 0); + + if (atomic_long_sub_return_release(i, &lock->v) == 0) + wake_up_all(&lock->wait); +} + +static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i) +{ + long v = atomic_long_read(&lock->v), old; + + do { + old = v; + + if (i > 0 ? v < 0 : v > 0) + return false; + } while ((v = atomic_long_cmpxchg_acquire(&lock->v, + old, old + i)) != old); + return true; +} + +static void __pagecache_lock_get(struct pagecache_lock *lock, long i) +{ + wait_event(lock->wait, __pagecache_lock_tryget(lock, i)); +} + +void pagecache_add_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, 1); +} +EXPORT_SYMBOL(pagecache_add_put); + +void pagecache_add_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, 1); +} +EXPORT_SYMBOL(pagecache_add_get); + +void __pagecache_block_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, -1); +} +EXPORT_SYMBOL(__pagecache_block_put); + +void __pagecache_block_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, -1); +} +EXPORT_SYMBOL(__pagecache_block_get); + +void pagecache_block_put(struct pagecache_lock *lock) +{ + BUG_ON(current->pagecache_lock != lock); + current->pagecache_lock = NULL; + __pagecache_lock_put(lock, -1); +} +EXPORT_SYMBOL(pagecache_block_put); + +void pagecache_block_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, -1); + BUG_ON(current->pagecache_lock); + current->pagecache_lock = lock; +} +EXPORT_SYMBOL(pagecache_block_get); + static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { @@ -632,18 +699,21 @@ static int __add_to_page_cache_locked(struct page *page, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); + if (current->pagecache_lock != &mapping->add_lock) + pagecache_add_get(&mapping->add_lock); + if (!huge) { error = mem_cgroup_try_charge(page, current->mm, gfp_mask, &memcg, false); if (error) - return error; + goto err; } error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); if (error) { if (!huge) mem_cgroup_cancel_charge(page, memcg, false); - return error; + goto err; } get_page(page); @@ -663,7 +733,11 @@ static int __add_to_page_cache_locked(struct page *page, if (!huge) mem_cgroup_commit_charge(page, memcg, false, false); trace_mm_filemap_add_to_page_cache(page); - return 0; +err: + if (current->pagecache_lock != &mapping->add_lock) + pagecache_add_put(&mapping->add_lock); + + return error; err_insert: page->mapping = NULL; /* Leave page->index set: truncation relies upon it */ @@ -671,7 +745,7 @@ err_insert: if (!huge) mem_cgroup_cancel_charge(page, memcg, false); put_page(page); - return error; + goto err; } /** @@ -2027,7 +2101,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * Do we have something in the page cache already? */ page = find_get_page(mapping, offset); - if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { + if (unlikely(current->pagecache_lock == &mapping->add_lock)) { + /* + * fault from e.g. dio -> get_user_pages() - _don't_ want to do + * readahead, only read in page we need: + */ + if (!page) + goto no_cached_page; + } else if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { /* * We found the page, so try async readahead before * waiting for the lock. |