From 006ff7498fe89bd9dfb891101f02557d5cfcf427 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Apr 2025 01:45:05 -0400 Subject: saner calling conventions for ->d_automount() Currently the calling conventions for ->d_automount() instances have an odd wart - returned new mount to be attached is expected to have refcount 2. That kludge is intended to make sure that mark_mounts_for_expiry() called before we get around to attaching that new mount to the tree won't decide to take it out. finish_automount() drops the extra reference after it's done with attaching mount to the tree - or drops the reference twice in case of error. ->d_automount() instances have rather counterintuitive boilerplate in them. There's a much simpler approach: have mark_mounts_for_expiry() skip the mounts that are yet to be mounted. And to hell with grabbing/dropping those extra references. Makes for simpler correctness analysis, at that... Reviewed-by: Christian Brauner Reviewed-by: Jeff Layton Reviewed-by: Paulo Alcantara (Red Hat) Acked-by: David Howells Tested-by: David Howells Acked-by: Steven Rostedt (Google) Signed-off-by: Al Viro --- Documentation/filesystems/porting.rst | 7 +++++++ Documentation/filesystems/vfs.rst | 4 +--- fs/afs/mntpt.c | 1 - fs/fuse/dir.c | 3 --- fs/namespace.c | 9 +++------ fs/nfs/namespace.c | 1 - fs/smb/client/namespace.c | 1 - kernel/trace/trace.c | 2 -- 8 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 767b2927c762..749637231773 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1203,3 +1203,10 @@ should use d_drop();d_splice_alias() and return the result of the latter. If a positive dentry cannot be returned for some reason, in-kernel clients such as cachefiles, nfsd, smb/server may not perform ideally but will fail-safe. + +--- + +**mandatory** + +Calling conventions for ->d_automount() have changed; we should *not* grab +an extra reference to new mount - it should be returned with refcount 1. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index ae79c30b6c0c..cc0a58e96770 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1411,9 +1411,7 @@ defined: If a vfsmount is returned, the caller will attempt to mount it on the mountpoint and will remove the vfsmount from its - expiration list in the case of failure. The vfsmount should be - returned with 2 refs on it to prevent automatic expiration - the - caller will clean up the additional ref. + expiration list in the case of failure. This function is only used if DCACHE_NEED_AUTOMOUNT is set on the dentry. This is set by __d_instantiate() if S_AUTOMOUNT is diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 45cee6534122..9434a5399f2b 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -189,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path) if (IS_ERR(newmnt)) return newmnt; - mntget(newmnt); /* prevent immediate expiration */ mnt_set_expiry(newmnt, &afs_vfsmounts); queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 83ac192e7fdd..05d8584fd3b9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -319,9 +319,6 @@ static struct vfsmount *fuse_dentry_automount(struct path *path) /* Create the submount */ mnt = fc_mount(fsc); - if (!IS_ERR(mnt)) - mntget(mnt); - put_fs_context(fsc); return mnt; } diff --git a/fs/namespace.c b/fs/namespace.c index 98a5cd756e9a..018e95fe5459 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3902,10 +3902,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) return PTR_ERR(m); mnt = real_mount(m); - /* The new mount record should have at least 2 refs to prevent it being - * expired before we get a chance to add it - */ - BUG_ON(mnt_get_count(mnt) < 2); if (m->mnt_sb == path->mnt->mnt_sb && m->mnt_root == dentry) { @@ -3938,7 +3934,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) unlock_mount(mp); if (unlikely(err)) goto discard; - mntput(m); return 0; discard_locked: @@ -3952,7 +3947,6 @@ discard: namespace_unlock(); } mntput(m); - mntput(m); return err; } @@ -3989,11 +3983,14 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* extract from the expiration list every vfsmount that matches the * following criteria: + * - already mounted * - only referenced by its parent vfsmount * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { + if (!is_mounted(&mnt->mnt)) + continue; if (!xchg(&mnt->mnt_expiry_mark, 1) || propagate_mount_busy(mnt, 1)) continue; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 973aed9cc5fe..7f1ec9c67ff2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path) if (IS_ERR(mnt)) goto out_fc; - mntget(mnt); /* prevent immediate expiration */ if (timeout <= 0) goto out_fc; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index e3f9213131c4..778daf11f1db 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -283,7 +283,6 @@ struct vfsmount *cifs_d_automount(struct path *path) return newmnt; } - mntget(newmnt); /* prevent immediate expiration */ mnt_set_expiry(newmnt, &cifs_automount_list); schedule_delayed_work(&cifs_automount_task, cifs_mountpoint_expiry_timeout); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b8db27fb6ef..e22aacb0028a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10088,8 +10088,6 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) put_filesystem(type); if (IS_ERR(mnt)) return NULL; - mntget(mnt); - return mnt; } -- cgit v1.2.3 From 2dbf6e0df447d1542f8fd158b17a06d2e8ede15e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 May 2025 21:04:08 -0400 Subject: kill vfs_submount() The last remaining user of vfs_submount() (tracefs) is easy to convert to fs_context_for_submount(); do that and bury that thing, along with SB_SUBMOUNT Reviewed-by: Jan Kara Acked-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Al Viro --- fs/namespace.c | 15 --------------- fs/super.c | 9 +-------- include/linux/fs.h | 1 - include/linux/mount.h | 3 --- kernel/trace/trace.c | 19 ++++++++++++++++--- 5 files changed, 17 insertions(+), 30 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 018e95fe5459..0577a9fb6050 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1329,21 +1329,6 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type, } EXPORT_SYMBOL_GPL(vfs_kern_mount); -struct vfsmount * -vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, - const char *name, void *data) -{ - /* Until it is worked out how to pass the user namespace - * through from the parent mount to the submount don't support - * unprivileged mounts with submounts. - */ - if (mountpoint->d_sb->s_user_ns != &init_user_ns) - return ERR_PTR(-EPERM); - - return vfs_kern_mount(type, SB_SUBMOUNT, name, data); -} -EXPORT_SYMBOL_GPL(vfs_submount); - static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { diff --git a/fs/super.c b/fs/super.c index 97a17f9d9023..1886e4c930e0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -823,13 +823,6 @@ struct super_block *sget(struct file_system_type *type, struct super_block *old; int err; - /* We don't yet pass the user namespace of the parent - * mount through to here so always use &init_user_ns - * until that changes. - */ - if (flags & SB_SUBMOUNT) - user_ns = &init_user_ns; - retry: spin_lock(&sb_lock); if (test) { @@ -849,7 +842,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); + s = alloc_super(type, flags, user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..515e702d98ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1240,7 +1240,6 @@ extern int send_sigurg(struct file *file); /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) -#define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959..d4eb90a367af 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -98,9 +98,6 @@ extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, - struct file_system_type *type, - const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e22aacb0028a..936a615e8c56 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -51,6 +51,7 @@ #include #include #include /* vmap_page_range() */ +#include #include /* COMMAND_LINE_SIZE */ @@ -10075,6 +10076,8 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; struct file_system_type *type; + struct fs_context *fc; + int ret; /* * To maintain backward compatibility for tools that mount @@ -10084,10 +10087,20 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) type = get_fs_type("tracefs"); if (!type) return NULL; - mnt = vfs_submount(mntpt, type, "tracefs", NULL); + + fc = fs_context_for_submount(type, mntpt); put_filesystem(type); - if (IS_ERR(mnt)) - return NULL; + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ret = vfs_parse_fs_string(fc, "source", + "tracefs", strlen("tracefs")); + if (!ret) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); + + put_fs_context(fc); return mnt; } -- cgit v1.2.3