diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-06-02 12:49:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-06-02 12:49:16 -0700 |
commit | fcd0bb8e99f7f5fbe6979b8633ed86502d822203 (patch) | |
tree | 5e157a817c881c0b7cdfddabc76de24b0b34df71 | |
parent | 7f9039c524a351c684149ecf1b3c5145a0dff2fe (diff) | |
parent | 5402c4d4d2000a9baa30c1157c97152ec6383733 (diff) |
Merge tag 'vfs-6.16-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:
- Fix the AT_HANDLE_CONNECTABLE option so filesystems that don't know
how to decode a connected non-dir dentry fail the request
- Use repr(transparent) to ensure identical layout between the C and
Rust implementation of struct file
- Add a missing xas_pause() into the dax code employing
wait_entry_unlocked_exclusive()
- Fix FOP_DONTCACHE which we disabled for v6.15.
A folio could get redirtied and/or scheduled for writeback after the
initial dropbehind test. Change the test accordingly to handle these
cases so we can re-enable FOP_DONTCACHE again
* tag 'vfs-6.16-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
exportfs: require ->fh_to_parent() to encode connectable file handles
rust: file: improve safety comments
rust: file: mark `LocalFile` as `repr(transparent)`
fs/dax: Fix "don't skip locked entries when scanning entries"
iomap: don't lose folio dropbehind state for overwrites
mm/filemap: unify dropbehind flag testing and clearing
mm/filemap: unify read/write dropbehind naming
Revert "Disable FOP_DONTCACHE for now due to bugs"
mm/filemap: use filemap_end_dropbehind() for read invalidation
mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback
-rw-r--r-- | fs/dax.c | 2 | ||||
-rw-r--r-- | fs/iomap/buffered-io.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 22 | ||||
-rw-r--r-- | include/linux/exportfs.h | 10 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/iomap.h | 5 | ||||
-rw-r--r-- | mm/filemap.c | 39 | ||||
-rw-r--r-- | rust/kernel/fs/file.rs | 10 |
8 files changed, 68 insertions, 24 deletions
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); - xas_pause(xas); + xas_reset(xas); xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 233abf598f65..3729391a18f3 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1691,6 +1691,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; + if (folio_test_dropbehind(folio)) + ioend_flags |= IOMAP_IOEND_DONTCACHE; if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 26a04a783489..63151feb9c3f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -436,6 +436,25 @@ allocate_blocks: return 0; } +static bool +xfs_ioend_needs_wq_completion( + struct iomap_ioend *ioend) +{ + /* Changing inode size requires a transaction. */ + if (xfs_ioend_is_append(ioend)) + return true; + + /* Extent manipulation requires a transaction. */ + if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)) + return true; + + /* Page cache invalidation cannot be done in irq context. */ + if (ioend->io_flags & IOMAP_IOEND_DONTCACHE) + return true; + + return false; +} + static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -460,8 +479,7 @@ xfs_submit_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || - (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) + if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fc93f0abf513..25c4a5afbd44 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -322,6 +325,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, return exportfs_can_encode_fid(nop); /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index da86fcb11882..96c7925a6551 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2207,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 68416b135151..522644d62f30 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) /* * Structure for writeback I/O completions. diff --git a/mm/filemap.c b/mm/filemap.c index 09d005848f0d..48c944e2c163 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,13 +1589,30 @@ int folio_wait_private_2_killable(struct folio *folio) } EXPORT_SYMBOL(folio_wait_private_2_killable); +static void filemap_end_dropbehind(struct folio *folio) +{ + struct address_space *mapping = folio->mapping; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (folio_test_writeback(folio) || folio_test_dirty(folio)) + return; + if (!folio_test_clear_dropbehind(folio)) + return; + if (mapping) + folio_unmap_invalidate(mapping, folio, 0); +} + /* * If folio was marked as dropbehind, then pages should be dropped when writeback * completes. Do that now. If we fail, it's likely because of a big folio - * just reset dropbehind for that case and latter completions should invalidate. */ -static void folio_end_dropbehind_write(struct folio *folio) +static void filemap_end_dropbehind_write(struct folio *folio) { + if (!folio_test_dropbehind(folio)) + return; + /* * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, * but can happen if normal writeback just happens to find dirty folios @@ -1604,8 +1621,7 @@ static void folio_end_dropbehind_write(struct folio *folio) * invalidation in that case. */ if (in_task() && folio_trylock(folio)) { - if (folio->mapping) - folio_unmap_invalidate(folio->mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -1620,8 +1636,6 @@ static void folio_end_dropbehind_write(struct folio *folio) */ void folio_end_writeback(struct folio *folio) { - bool folio_dropbehind = false; - VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); /* @@ -1643,14 +1657,11 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake_bit(). */ folio_get(folio); - if (!folio_test_dirty(folio)) - folio_dropbehind = folio_test_clear_dropbehind(folio); if (__folio_end_writeback(folio)) folio_wake_bit(folio, PG_writeback); - acct_reclaim_writeback(folio); - if (folio_dropbehind) - folio_end_dropbehind_write(folio); + filemap_end_dropbehind_write(folio); + acct_reclaim_writeback(folio); folio_put(folio); } EXPORT_SYMBOL(folio_end_writeback); @@ -2635,16 +2646,14 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) return (pos1 >> shift == pos2 >> shift); } -static void filemap_end_dropbehind_read(struct address_space *mapping, - struct folio *folio) +static void filemap_end_dropbehind_read(struct folio *folio) { if (!folio_test_dropbehind(folio)) return; if (folio_test_writeback(folio) || folio_test_dirty(folio)) return; if (folio_trylock(folio)) { - if (folio_test_clear_dropbehind(folio)) - folio_unmap_invalidate(mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2765,7 +2774,7 @@ put_folios: for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - filemap_end_dropbehind_read(mapping, folio); + filemap_end_dropbehind_read(folio); folio_put(folio); } folio_batch_init(&fbatch); diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 13a0e44cd1aa..72d84fb0e266 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -219,12 +219,13 @@ unsafe impl AlwaysRefCounted for File { /// must be on the same thread as this file. /// /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +#[repr(transparent)] pub struct LocalFile { inner: Opaque<bindings::file>, } // SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation -// makes `ARef<File>` own a normal refcount. +// makes `ARef<LocalFile>` own a normal refcount. unsafe impl AlwaysRefCounted for LocalFile { #[inline] fn inc_ref(&self) { @@ -235,7 +236,8 @@ unsafe impl AlwaysRefCounted for LocalFile { #[inline] unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) { // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we - // may drop it. The cast is okay since `File` has the same representation as `struct file`. + // may drop it. The cast is okay since `LocalFile` has the same representation as + // `struct file`. unsafe { bindings::fput(obj.cast().as_ptr()) } } } @@ -273,7 +275,7 @@ impl LocalFile { #[inline] pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile { // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the - // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // duration of `'a`. The cast is okay because `LocalFile` is `repr(transparent)`. // // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. unsafe { &*ptr.cast() } @@ -347,7 +349,7 @@ impl File { #[inline] pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File { // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the - // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // duration of `'a`. The cast is okay because `File` is `repr(transparent)`. // // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. unsafe { &*ptr.cast() } |