filemap: for_each_folio_batched()folio_iter_batched

This adds a cleaner interface around iterating over folios with batched lookup, and a new iterator type (folio_iter_batched), and converts most users of filemap_get_folios() to the new interface. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
author: Kent Overstreet <kent.overstreet@gmail.com> 2022-06-05 16:44:53 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2022-06-05 16:44:53 -0400
commit: 5d370ea0a95804da1d533cc806040f78b0a6f297 (patch)
tree: ea333d8344734f33d79cbcf8d4e7049c655b93a4
parent: 6fc5ba99ee0db14f86d99db929210e21b68b9ab2 (diff)
7 files changed, 223 insertions, 197 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index ce9844d7c10f..cbdfc0c79aae 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1604,53 +1604,44 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
 {
 	struct inode *bd_inode = bdev->bd_inode;
 	struct address_space *bd_mapping = bd_inode->i_mapping;
-	struct folio_batch fbatch;
+	struct folio_iter_batched iter;
+	struct folio *folio;
 	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
 	pgoff_t end;
-	int i, count;
 	struct buffer_head *bh;
 	struct buffer_head *head;
 
 	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
-	folio_batch_init(&fbatch);
-	while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
-		count = folio_batch_count(&fbatch);
-		for (i = 0; i < count; i++) {
-			struct folio *folio = fbatch.folios[i];
 
-			if (!folio_buffers(folio))
-				continue;
-			/*
-			 * We use folio lock instead of bd_mapping->private_lock
-			 * to pin buffers here since we can afford to sleep and
-			 * it scales better than a global spinlock lock.
-			 */
-			folio_lock(folio);
-			/* Recheck when the folio is locked which pins bhs */
-			head = folio_buffers(folio);
-			if (!head)
-				goto unlock_page;
-			bh = head;
-			do {
-				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
-					goto next;
-				if (bh->b_blocknr >= block + len)
-					break;
-				clear_buffer_dirty(bh);
-				wait_on_buffer(bh);
-				clear_buffer_req(bh);
+	for_each_folio_batched(bd_mapping, iter, index, end, folio) {
+		if (!folio_buffers(folio))
+			continue;
+		/*
+		 * We use folio lock instead of bd_mapping->private_lock
+		 * to pin buffers here since we can afford to sleep and
+		 * it scales better than a global spinlock lock.
+		 */
+		folio_lock(folio);
+		/* Recheck when the folio is locked which pins bhs */
+		head = folio_buffers(folio);
+		if (!head)
+			goto unlock_page;
+		bh = head;
+		do {
+			if (!buffer_mapped(bh) || (bh->b_blocknr < block))
+				goto next;
+			if (bh->b_blocknr >= block + len)
+				break;
+			clear_buffer_dirty(bh);
+			wait_on_buffer(bh);
+			clear_buffer_req(bh);
 next:
-				bh = bh->b_this_page;
-			} while (bh != head);
+			bh = bh->b_this_page;
+		} while (bh != head);
 unlock_page:
-			folio_unlock(folio);
-		}
-		folio_batch_release(&fbatch);
-		cond_resched();
-		/* End of range already reached? */
-		if (index > end || !index)
-			break;
+		folio_unlock(folio);
 	}
+	folio_iter_batched_exit(&iter);
 }
 EXPORT_SYMBOL(clean_bdev_aliases);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1aaea53e67b5..f73941ac5089 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1554,9 +1554,9 @@ struct mpage_da_data {
 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 				       bool invalidate)
 {
-	unsigned nr, i;
 	pgoff_t index, end;
-	struct folio_batch fbatch;
+	struct folio_iter_batched iter;
+	struct folio *folio;
 	struct inode *inode = mpd->inode;
 	struct address_space *mapping = inode->i_mapping;
 
@@ -1574,31 +1574,24 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 		ext4_es_remove_extent(inode, start, last - start + 1);
 	}
 
-	folio_batch_init(&fbatch);
-	while (index <= end) {
-		nr = filemap_get_folios(mapping, &index, end, &fbatch);
-		if (nr == 0)
+	for_each_folio_batched(mapping, iter, index, end, folio) {
+		BUG_ON(folio->index < mpd->first_page);
+		BUG_ON(!folio_test_locked(folio));
+		BUG_ON(folio_test_writeback(folio));
+
+		if ((folio_next_index(folio) - 1 > end))
 			break;
-		for (i = 0; i < nr; i++) {
-			struct folio *folio = fbatch.folios[i];
 
-			if (folio->index < mpd->first_page)
-				continue;
-			if (folio->index + folio_nr_pages(folio) - 1 > end)
-				continue;
-			BUG_ON(!folio_test_locked(folio));
-			BUG_ON(folio_test_writeback(folio));
-			if (invalidate) {
-				if (folio_mapped(folio))
-					folio_clear_dirty_for_io(folio);
-				block_invalidate_folio(folio, 0,
-						folio_size(folio));
-				folio_clear_uptodate(folio);
-			}
-			folio_unlock(folio);
+		if (invalidate) {
+			if (folio_mapped(folio))
+				folio_clear_dirty_for_io(folio);
+			block_invalidate_folio(folio, 0,
+					       folio_size(folio));
+			folio_clear_uptodate(folio);
 		}
-		folio_batch_release(&fbatch);
+		folio_unlock(folio);
 	}
+	folio_iter_batched_exit(&iter);
 }
 
 static void ext4_print_free_blocks(struct inode *inode)
@@ -2314,14 +2307,14 @@ out:
  */
 static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 {
-	struct folio_batch fbatch;
-	unsigned nr, i;
+	struct folio_iter_batched iter;
+	struct folio *folio;
 	struct inode *inode = mpd->inode;
 	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
 	pgoff_t start, end;
 	ext4_lblk_t lblk;
 	ext4_fsblk_t pblock;
-	int err;
+	int err = 0;
 	bool map_bh = false;
 
 	start = mpd->map.m_lblk >> bpp_bits;
@@ -2329,37 +2322,30 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 	lblk = start << bpp_bits;
 	pblock = mpd->map.m_pblk;
 
-	folio_batch_init(&fbatch);
-	while (start <= end) {
-		nr = filemap_get_folios(inode->i_mapping, &start, end, &fbatch);
-		if (nr == 0)
-			break;
-		for (i = 0; i < nr; i++) {
-			struct page *page = &fbatch.folios[i]->page;
+	for_each_folio_batched(inode->i_mapping, iter, start, end, folio) {
+		struct page *page = &folio->page;
 
-			err = mpage_process_page(mpd, page, &lblk, &pblock,
-						 &map_bh);
-			/*
-			 * If map_bh is true, means page may require further bh
-			 * mapping, or maybe the page was submitted for IO.
-			 * So we return to call further extent mapping.
-			 */
-			if (err < 0 || map_bh)
-				goto out;
-			/* Page fully mapped - let IO run! */
-			err = mpage_submit_page(mpd, page);
-			if (err < 0)
-				goto out;
-		}
-		folio_batch_release(&fbatch);
+		err = mpage_process_page(mpd, page, &lblk, &pblock,
+					 &map_bh);
+		/*
+		 * If map_bh is true, means page may require further bh
+		 * mapping, or maybe the page was submitted for IO.
+		 * So we return to call further extent mapping.
+		 */
+		if (err < 0 || map_bh)
+			goto out;
+		/* Page fully mapped - let IO run! */
+		err = mpage_submit_page(mpd, page);
+		if (err < 0)
+			goto out;
 	}
+
 	/* Extent fully mapped and matches with page boundary. We are done. */
 	mpd->map.m_len = 0;
 	mpd->map.m_flags = 0;
-	return 0;
 out:
-	folio_batch_release(&fbatch);
-	return err;
+	folio_iter_batched_exit(&iter);
+	return err < 0 ? err : 0;
 }
 
 static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 009e6c519e98..45f605d7978f 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1832,42 +1832,28 @@ bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
 void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino)
 {
 	struct address_space *mapping = sbi->compress_inode->i_mapping;
-	struct folio_batch fbatch;
-	pgoff_t index = 0;
-	pgoff_t end = MAX_BLKADDR(sbi);
+	struct folio_iter_batched iter;
+	struct folio *folio;
 
 	if (!mapping->nrpages)
 		return;
 
-	folio_batch_init(&fbatch);
-
-	do {
-		unsigned int nr, i;
-
-		nr = filemap_get_folios(mapping, &index, end - 1, &fbatch);
-		if (!nr)
-			break;
-
-		for (i = 0; i < nr; i++) {
-			struct folio *folio = fbatch.folios[i];
-
-			folio_lock(folio);
-			if (folio->mapping != mapping) {
-				folio_unlock(folio);
-				continue;
-			}
-
-			if (ino != get_page_private_data(&folio->page)) {
-				folio_unlock(folio);
-				continue;
-			}
+	for_each_folio_batched(mapping, iter, 0, MAX_BLKADDR(sbi), folio) {
+		folio_lock(folio);
+		if (folio->mapping != mapping) {
+			folio_unlock(folio);
+			continue;
+		}
 
-			generic_error_remove_page(mapping, &folio->page);
+		if (ino != get_page_private_data(&folio->page)) {
 			folio_unlock(folio);
+			continue;
 		}
-		folio_batch_release(&fbatch);
-		cond_resched();
-	} while (index < end);
+
+		generic_error_remove_page(mapping, &folio->page);
+		folio_unlock(folio);
+	}
+	folio_iter_batched_exit(&iter);
 }
 
 int f2fs_init_compress_inode(struct f2fs_sb_info *sbi)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 14d33f725e05..5f6a0b20f4b7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -468,79 +468,72 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 {
 	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
+	struct folio_iter_batched iter;
+	struct folio *folio;
 	const pgoff_t start = lstart >> huge_page_shift(h);
 	const pgoff_t end = lend >> huge_page_shift(h);
-	struct folio_batch fbatch;
-	pgoff_t next, index;
-	int i, freed = 0;
+	int  freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
-	folio_batch_init(&fbatch);
-	next = start;
-	while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
-		for (i = 0; i < folio_batch_count(&fbatch); ++i) {
-			struct folio *folio = fbatch.folios[i];
-			u32 hash = 0;
-
-			index = folio->index;
-			if (!truncate_op) {
-				/*
-				 * Only need to hold the fault mutex in the
-				 * hole punch case.  This prevents races with
-				 * page faults.  Races are not possible in the
-				 * case of truncation.
-				 */
-				hash = hugetlb_fault_mutex_hash(mapping, index);
-				mutex_lock(&hugetlb_fault_mutex_table[hash]);
-			}
+	for_each_folio_batched(mapping, iter, start, end - 1, folio) {
+		u32 hash = 0;
+		pgoff_t index = folio->index;
 
+		if (!truncate_op) {
 			/*
-			 * If folio is mapped, it was faulted in after being
-			 * unmapped in caller.  Unmap (again) now after taking
-			 * the fault mutex.  The mutex will prevent faults
-			 * until we finish removing the folio.
-			 *
-			 * This race can only happen in the hole punch case.
-			 * Getting here in a truncate operation is a bug.
+			 * Only need to hold the fault mutex in the
+			 * hole punch case.  This prevents races with
+			 * page faults.  Races are not possible in the
+			 * case of truncation.
 			 */
-			if (unlikely(folio_mapped(folio))) {
-				BUG_ON(truncate_op);
-
-				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-				i_mmap_lock_write(mapping);
-				mutex_lock(&hugetlb_fault_mutex_table[hash]);
-				hugetlb_vmdelete_list(&mapping->i_mmap,
-					index * pages_per_huge_page(h),
-					(index + 1) * pages_per_huge_page(h),
-					ZAP_FLAG_DROP_MARKER);
-				i_mmap_unlock_write(mapping);
-			}
-
-			folio_lock(folio);
-			/*
-			 * We must free the huge page and remove from page
-			 * cache (remove_huge_page) BEFORE removing the
-			 * region/reserve map (hugetlb_unreserve_pages).  In
-			 * rare out of memory conditions, removal of the
-			 * region/reserve map could fail. Correspondingly,
-			 * the subpool and global reserve usage count can need
-			 * to be adjusted.
-			 */
-			VM_BUG_ON(HPageRestoreReserve(&folio->page));
-			remove_huge_page(&folio->page);
-			freed++;
-			if (!truncate_op) {
-				if (unlikely(hugetlb_unreserve_pages(inode,
-							index, index + 1, 1)))
-					hugetlb_fix_reserve_counts(inode);
-			}
-
-			folio_unlock(folio);
-			if (!truncate_op)
-				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			hash = hugetlb_fault_mutex_hash(mapping, index);
+			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 		}
-		folio_batch_release(&fbatch);
-		cond_resched();
+
+		/*
+		 * If folio is mapped, it was faulted in after being
+		 * unmapped in caller.  Unmap (again) now after taking
+		 * the fault mutex.  The mutex will prevent faults
+		 * until we finish removing the folio.
+		 *
+		 * This race can only happen in the hole punch case.
+		 * Getting here in a truncate operation is a bug.
+		 */
+		if (unlikely(folio_mapped(folio))) {
+			BUG_ON(truncate_op);
+
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			i_mmap_lock_write(mapping);
+			mutex_lock(&hugetlb_fault_mutex_table[hash]);
+			hugetlb_vmdelete_list(&mapping->i_mmap,
+				index * pages_per_huge_page(h),
+				(index + 1) * pages_per_huge_page(h),
+				ZAP_FLAG_DROP_MARKER);
+			i_mmap_unlock_write(mapping);
+		}
+
+		folio_lock(folio);
+		/*
+		 * We must free the huge page and remove from page
+		 * cache (remove_huge_page) BEFORE removing the
+		 * region/reserve map (hugetlb_unreserve_pages).  In
+		 * rare out of memory conditions, removal of the
+		 * region/reserve map could fail. Correspondingly,
+		 * the subpool and global reserve usage count can need
+		 * to be adjusted.
+		 */
+		VM_BUG_ON(HPageRestoreReserve(&folio->page));
+		remove_huge_page(&folio->page);
+		freed++;
+		if (!truncate_op) {
+			if (unlikely(hugetlb_unreserve_pages(inode,
+						index, index + 1, 1)))
+				hugetlb_fix_reserve_counts(inode);
+		}
+
+		folio_unlock(folio);
+		if (!truncate_op)
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 	}
 
 	if (truncate_op)
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3267e96c256c..d7dc8eeb44d8 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -294,18 +294,10 @@ repeat:
 void nilfs_copy_back_pages(struct address_space *dmap,
 			   struct address_space *smap)
 {
-	struct folio_batch fbatch;
-	unsigned int i, n;
-	pgoff_t start = 0;
+	struct folio_iter_batched iter;
+	struct folio *folio, *dfolio;
 
-	folio_batch_init(&fbatch);
-repeat:
-	n = filemap_get_folios(smap, &start, ~0UL, &fbatch);
-	if (!n)
-		return;
-
-	for (i = 0; i < folio_batch_count(&fbatch); i++) {
-		struct folio *folio = fbatch.folios[i], *dfolio;
+	for_each_folio_batched(smap, iter, 0, ~0UL, folio) {
 		pgoff_t index = folio->index;
 
 		folio_lock(folio);
@@ -344,10 +336,7 @@ repeat:
 		}
 		folio_unlock(folio);
 	}
-	folio_batch_release(&fbatch);
-	cond_resched();
-
-	goto repeat;
+	folio_iter_batched_exit(&iter);
 }
 
 /**
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 6649154a2115..c6d2eb946812 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -137,4 +137,36 @@ static inline void folio_batch_release(struct folio_batch *fbatch)
 }
 
 void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
+
+struct folio_iter_batched {
+	struct folio_batch	batch;
+	unsigned		batch_idx;
+	struct address_space	*mapping;
+	pgoff_t			pos;
+	pgoff_t			end;
+};
+
+static inline struct folio_iter_batched
+folio_iter_batched_init(struct address_space *mapping, pgoff_t start, pgoff_t end)
+{
+	return (struct folio_iter_batched) {
+		.mapping	= mapping,
+		.pos		= start,
+		.end		= end,
+	};
+}
+
+static inline void folio_iter_batched_exit(struct folio_iter_batched *iter)
+{
+	folio_batch_release(&iter->batch);
+}
+
+struct folio *folio_iter_batched_peek(struct folio_iter_batched *);
+void folio_iter_batched_advance(struct folio_iter_batched *);
+
+#define for_each_folio_batched(_mapping, _iter, _start, _end, _folio)	\
+	for (_iter = folio_iter_batched_init(_mapping, _start, _end);	\
+	     (_folio = folio_iter_batched_peek(&(_iter)));		\
+	     folio_iter_batched_advance(&(_iter)))
+
 #endif /* _LINUX_PAGEVEC_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d631bc317131..930478daa168 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2182,6 +2182,55 @@ out:
 }
 EXPORT_SYMBOL(filemap_get_folios);
 
+static inline struct folio *__folio_iter_batched_peek(struct folio_iter_batched *iter)
+{
+	return iter->batch.folios[iter->batch_idx];
+}
+
+struct folio *folio_iter_batched_peek(struct folio_iter_batched *iter)
+{
+	struct folio *f;
+
+	if (iter->batch_idx >= iter->batch.nr) {
+		pgoff_t start = iter->pos;
+
+		folio_batch_release(&iter->batch);
+
+		cond_resched();
+
+		filemap_get_folios(iter->mapping, &start, iter->end, &iter->batch);
+		iter->batch_idx = 0;
+	}
+
+	if (!iter->batch.nr) {
+		iter->pos = iter->end + 1;
+		return NULL;
+	}
+
+	f = __folio_iter_batched_peek(iter);
+	/*
+	 * folio might span iter->pos, but iter->pos should be monotonically
+	 * increasing:
+	 */
+	iter->pos = max(iter->pos, f->index);
+	return f;
+}
+EXPORT_SYMBOL(folio_iter_batched_peek);
+
+void folio_iter_batched_advance(struct folio_iter_batched *iter)
+{
+	/* Do we have a previously returned folio? */
+	if (iter->batch_idx < iter->batch.nr) {
+		struct folio *f = __folio_iter_batched_peek(iter);
+		pgoff_t f_end = folio_next_index(f);
+
+		BUG_ON(iter->pos < f->index || iter->pos >= f_end);
+		iter->pos = f_end;
+		iter->batch_idx++;
+	}
+}
+EXPORT_SYMBOL(folio_iter_batched_advance);
+
 static inline
 bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
 {
author	Kent Overstreet <kent.overstreet@gmail.com>	2022-06-05 16:44:53 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2022-06-05 16:44:53 -0400
commit	5d370ea0a95804da1d533cc806040f78b0a6f297 (patch)
tree	ea333d8344734f33d79cbcf8d4e7049c655b93a4
parent	6fc5ba99ee0db14f86d99db929210e21b68b9ab2 (diff)