diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/.gitignore | 1 | ||||
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 32 | ||||
-rw-r--r-- | Documentation/filesystems/Makefile | 5 | ||||
-rw-r--r-- | Documentation/filesystems/autofs4-mount-control.txt | 71 | ||||
-rw-r--r-- | Documentation/filesystems/autofs4.txt | 8 | ||||
-rw-r--r-- | Documentation/filesystems/ceph.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/directory-locking | 2 | ||||
-rw-r--r-- | Documentation/filesystems/dnotify_test.c | 34 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 8 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 54 |
11 files changed, 109 insertions, 112 deletions
diff --git a/Documentation/filesystems/.gitignore b/Documentation/filesystems/.gitignore deleted file mode 100644 index 31d6e426b6d4..000000000000 --- a/Documentation/filesystems/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dnotify_test diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 9922939e7d99..f66e748fc5e4 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -2,8 +2,6 @@ - this file (info on some of the filesystems supported by linux). Locking - info on locking rules as they pertain to Linux VFS. -Makefile - - Makefile for building the filsystems-part of DocBook. 9p.txt - 9p (v9fs) is an implementation of the Plan 9 remote fs protocol. adfs.txt diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d30fb2cb5066..14cdc101d165 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -51,8 +51,6 @@ prototypes: int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); - int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); const char *(*get_link) (struct dentry *, struct inode *, void **); @@ -61,10 +59,7 @@ prototypes: int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, @@ -83,31 +78,44 @@ symlink: yes mkdir: yes unlink: yes (both) rmdir: yes (both) (see below) -rename: yes (all) (see below) -rename2: yes (all) (see below) +rename: yes (all) (see below) readlink: no get_link: no setattr: yes permission: no (may not block if called in rcu-walk mode) get_acl: no getattr: no -setxattr: yes -getxattr: no listxattr: no -removexattr: yes fiemap: no update_time: no atomic_open: yes tmpfile: no + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. - cross-directory ->rename() and rename2() has (per-superblock) -->s_vfs_rename_sem. + cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. See Documentation/filesystems/directory-locking for more detailed discussion of the locking scheme for directory operations. +----------------------- xattr_handler operations ----------------------- +prototypes: + bool (*list)(struct dentry *dentry); + int (*get)(const struct xattr_handler *handler, struct dentry *dentry, + struct inode *inode, const char *name, void *buffer, + size_t size); + int (*set)(const struct xattr_handler *handler, struct dentry *dentry, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags); + +locking rules: + all may block + i_mutex(inode) +list: no +get: no +set: yes + --------------------------- super_operations --------------------------- prototypes: struct inode *(*alloc_inode)(struct super_block *sb); diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile deleted file mode 100644 index 883010ce5e35..000000000000 --- a/Documentation/filesystems/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# List of programs to build -hostprogs-y := dnotify_test - -# Tell kbuild to always build the programs -always := $(hostprogs-y) diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt index aff22113a986..50a3e01a36f8 100644 --- a/Documentation/filesystems/autofs4-mount-control.txt +++ b/Documentation/filesystems/autofs4-mount-control.txt @@ -179,8 +179,19 @@ struct autofs_dev_ioctl { * including this struct */ __s32 ioctlfd; /* automount command fd */ - __u32 arg1; /* Command parameters */ - __u32 arg2; + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; char path[0]; }; @@ -192,8 +203,8 @@ optionally be used to check a specific mount corresponding to a given mount point file descriptor, and when requesting the uid and gid of the last successful mount on a directory within the autofs file system. -The fields arg1 and arg2 are used to communicate parameters and results of -calls made as described below. +The union is used to communicate parameters and results of calls made +as described below. The path field is used to pass a path where it is needed and the size field is used account for the increased structure length when translating the @@ -245,9 +256,9 @@ AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD Get the major and minor version of the autofs4 protocol version understood by loaded module. This call requires an initialized struct autofs_dev_ioctl with the ioctlfd field set to a valid autofs mount point descriptor -and sets the requested version number in structure field arg1. These -commands return 0 on success or one of the negative error codes if -validation fails. +and sets the requested version number in version field of struct args_protover +or sub_version field of struct args_protosubver. These commands return +0 on success or one of the negative error codes if validation fails. AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT @@ -256,9 +267,9 @@ AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT Obtain and release a file descriptor for an autofs managed mount point path. The open call requires an initialized struct autofs_dev_ioctl with the path field set and the size field adjusted appropriately as well -as the arg1 field set to the device number of the autofs mount. The -device number can be obtained from the mount options shown in -/proc/mounts. The close call requires an initialized struct +as the devid field of struct args_openmount set to the device number of +the autofs mount. The device number can be obtained from the mount options +shown in /proc/mounts. The close call requires an initialized struct autofs_dev_ioct with the ioctlfd field set to the descriptor obtained from the open call. The release of the file descriptor can also be done with close(2) so any open descriptors will also be closed at process exit. @@ -272,10 +283,10 @@ AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD Return mount and expire result status from user space to the kernel. Both of these calls require an initialized struct autofs_dev_ioctl with the ioctlfd field set to the descriptor obtained from the open -call and the arg1 field set to the wait queue token number, received -by user space in the foregoing mount or expire request. The arg2 field -is set to the status to be returned. For the ready call this is always -0 and for the fail call it is set to the errno of the operation. +call and the token field of struct args_ready or struct args_fail set +to the wait queue token number, received by user space in the foregoing +mount or expire request. The status field of struct args_fail is set to +the errno of the operation. It is set to 0 on success. AUTOFS_DEV_IOCTL_SETPIPEFD_CMD @@ -290,9 +301,10 @@ mount be catatonic (see next call). The call requires an initialized struct autofs_dev_ioctl with the ioctlfd field set to the descriptor obtained from the open call and -the arg1 field set to descriptor of the pipe. On success the call -also sets the process group id used to identify the controlling process -(eg. the owning automount(8) daemon) to the process group of the caller. +the pipefd field of struct args_setpipefd set to descriptor of the pipe. +On success the call also sets the process group id used to identify the +controlling process (eg. the owning automount(8) daemon) to the process +group of the caller. AUTOFS_DEV_IOCTL_CATATONIC_CMD @@ -323,9 +335,8 @@ mount on the given path dentry. The call requires an initialized struct autofs_dev_ioctl with the path field set to the mount point in question and the size field adjusted -appropriately as well as the arg1 field set to the device number of the -containing autofs mount. Upon return the struct field arg1 contains the -uid and arg2 the gid. +appropriately. Upon return the uid field of struct args_requester contains +the uid and gid field the gid. When reconstructing an autofs mount tree with active mounts we need to re-connect to mounts that may have used the original process uid and @@ -343,8 +354,9 @@ this ioctl is called until no further expire candidates are found. The call requires an initialized struct autofs_dev_ioctl with the ioctlfd field set to the descriptor obtained from the open call. In addition an immediate expire, independent of the mount timeout, can be -requested by setting the arg1 field to 1. If no expire candidates can -be found the ioctl returns -1 with errno set to EAGAIN. +requested by setting the how field of struct args_expire to 1. If no +expire candidates can be found the ioctl returns -1 with errno set to +EAGAIN. This call causes the kernel module to check the mount corresponding to the given ioctlfd for mounts that can be expired, issues an expire @@ -357,7 +369,8 @@ Checks if an autofs mount point is in use. The call requires an initialized struct autofs_dev_ioctl with the ioctlfd field set to the descriptor obtained from the open call and -it returns the result in the arg1 field, 1 for busy and 0 otherwise. +it returns the result in the may_umount field of struct args_askumount, +1 for busy and 0 otherwise. AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD @@ -369,12 +382,12 @@ The call requires an initialized struct autofs_dev_ioctl. There are two possible variations. Both use the path field set to the path of the mount point to check and the size field adjusted appropriately. One uses the ioctlfd field to identify a specific mount point to check while the other -variation uses the path and optionally arg1 set to an autofs mount type. -The call returns 1 if this is a mount point and sets arg1 to the device -number of the mount and field arg2 to the relevant super block magic -number (described below) or 0 if it isn't a mountpoint. In both cases -the the device number (as returned by new_encode_dev()) is returned -in field arg1. +variation uses the path and optionally in.type field of struct args_ismountpoint +set to an autofs mount type. The call returns 1 if this is a mount point +and sets out.devid field to the device number of the mount and out.magic +field to the relevant super block magic number (described below) or 0 if +it isn't a mountpoint. In both cases the the device number (as returned +by new_encode_dev()) is returned in out.devid field. If supplied with a file descriptor we're looking for a specific mount, not necessarily at the top of the mounted stack. In this case the path diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt index 39d02e19fb62..8fac3fe7b8c9 100644 --- a/Documentation/filesystems/autofs4.txt +++ b/Documentation/filesystems/autofs4.txt @@ -203,9 +203,9 @@ initiated or is being considered, otherwise it returns 0. Mountpoint expiry ----------------- -The VFS has a mechansim for automatically expiring unused mounts, +The VFS has a mechanism for automatically expiring unused mounts, much as it can expire any unused dentry information from the dcache. -This is guided by the MNT_SHRINKABLE flag. This only applies to +This is guided by the MNT_SHRINKABLE flag. This only applies to mounts that were created by `d_automount()` returning a filesystem to be mounted. As autofs doesn't return such a filesystem but leaves the mounting to the automount daemon, it must involve the automount daemon @@ -298,7 +298,7 @@ remove directories and symlinks using normal filesystem operations. autofs knows whether a process requesting some operation is the daemon or not based on its process-group id number (see getpgid(1)). -When an autofs filesystem it mounted the pgid of the mounting +When an autofs filesystem is mounted the pgid of the mounting processes is recorded unless the "pgrp=" option is given, in which case that number is recorded instead. Any request arriving from a process in that process group is considered to come from the daemon. @@ -450,7 +450,7 @@ Commands are: numbers for existing filesystems can be found in `/proc/self/mountinfo`. - **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`. -- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in +- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in catatonic mode, this can provide the write end of a new pipe in `arg1` to re-establish communication with a daemon. The process group of the calling process is used to identify the diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index d6030aa33376..f5306ee40ea9 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -98,6 +98,10 @@ Mount Options size. rsize=X + Specify the maximum read size in bytes. By default there is no + maximum. + + rasize=X Specify the maximum readahead. mount_timeout=X diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking index c314badbcfc6..4e32cb961e5b 100644 --- a/Documentation/filesystems/directory-locking +++ b/Documentation/filesystems/directory-locking @@ -19,7 +19,7 @@ locks victim and calls the method. Locks are exclusive. 4) rename() that is _not_ cross-directory. Locking rules: caller locks the parent and finds source and target. In case of exchange (with -RENAME_EXCHANGE in rename2() flags argument) lock both. In any case, +RENAME_EXCHANGE in flags argument) lock both. In any case, if the target already exists, lock it. If the source is a non-directory, lock it. If we need to lock both, lock them in inode pointer order. Then call the method. All locks are exclusive. diff --git a/Documentation/filesystems/dnotify_test.c b/Documentation/filesystems/dnotify_test.c deleted file mode 100644 index 8b37b4a1e18d..000000000000 --- a/Documentation/filesystems/dnotify_test.c +++ /dev/null @@ -1,34 +0,0 @@ -#define _GNU_SOURCE /* needed to get the defines */ -#include <fcntl.h> /* in glibc 2.2 this has the needed - values defined */ -#include <signal.h> -#include <stdio.h> -#include <unistd.h> - -static volatile int event_fd; - -static void handler(int sig, siginfo_t *si, void *data) -{ - event_fd = si->si_fd; -} - -int main(void) -{ - struct sigaction act; - int fd; - - act.sa_sigaction = handler; - sigemptyset(&act.sa_mask); - act.sa_flags = SA_SIGINFO; - sigaction(SIGRTMIN + 1, &act, NULL); - - fd = open(".", O_RDONLY); - fcntl(fd, F_SETSIG, SIGRTMIN + 1); - fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); - /* we will now be notified if any of the files - in "." is modified or new files are created */ - while (1) { - pause(); - printf("Got event on fd=%d\n", event_fd); - } -} diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index b1bd05ea66b2..bdd025ceb763 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -287,8 +287,8 @@ implementing on-disk size changes. Start with a copy of the old inode_setattr and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to be in order of zeroing blocks using block_truncate_page or similar helpers, size update and on finally on-disk truncation which should not fail. -inode_change_ok now includes the size checks for ATTR_SIZE and must be called -in the beginning of ->setattr unconditionally. +setattr_prepare (which used to be inode_change_ok) now includes the size checks +for ATTR_SIZE and must be called in the beginning of ->setattr unconditionally. [mandatory] @@ -592,3 +592,7 @@ in your dentry operations instead. work just as well; if it's something more complicated, use dentry->d_parent. Just be careful not to assume that fetching it more than once will yield the same value - in RCU mode it could change under you. +-- +[mandatory] + ->rename() has an added flags argument. Any flags not handled by the + filesystem should result in EINVAL being returned. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index cbec006e10e4..d619c8d71966 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -323,6 +323,35 @@ Whoever sets up the inode is responsible for filling in the "i_op" field. This is a pointer to a "struct inode_operations" which describes the methods that can be performed on individual inodes. +struct xattr_handlers +--------------------- + +On filesystems that support extended attributes (xattrs), the s_xattr +superblock field points to a NULL-terminated array of xattr handlers. Extended +attributes are name:value pairs. + + name: Indicates that the handler matches attributes with the specified name + (such as "system.posix_acl_access"); the prefix field must be NULL. + + prefix: Indicates that the handler matches all attributes with the specified + name prefix (such as "user."); the name field must be NULL. + + list: Determine if attributes matching this xattr handler should be listed + for a particular dentry. Used by some listxattr implementations like + generic_listxattr. + + get: Called by the VFS to get the value of a particular extended attribute. + This method is called by the getxattr(2) system call. + + set: Called by the VFS to set the value of a particular extended attribute. + When the new value is NULL, called to remove a particular extended + attribute. This method is called by the the setxattr(2) and + removexattr(2) system calls. + +When none of the xattr handlers of a filesystem match the specified attribute +name or when a filesystem doesn't support extended attributes, the various +*xattr(2) system calls return -EOPNOTSUPP. + The Inode Object ================ @@ -346,8 +375,6 @@ struct inode_operations { int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); - int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); const char *(*get_link) (struct dentry *, struct inode *, @@ -356,10 +383,7 @@ struct inode_operations { int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); @@ -416,11 +440,8 @@ otherwise noted. rename: called by the rename(2) system call to rename the object to have the parent and name given by the second inode and dentry. - rename2: this has an additional flags argument compared to rename. - If no flags are supported by the filesystem then this method - need not be implemented. If some flags are supported then the - filesystem must return -EINVAL for any unsupported or unknown - flags. Currently the following flags are implemented: + The filesystem must return -EINVAL for any unsupported or + unknown flags. Currently the following flags are implemented: (1) RENAME_NOREPLACE: this flag indicates that if the target of the rename exists the rename should fail with -EEXIST instead of replacing the target. The VFS already checks for @@ -463,19 +484,8 @@ otherwise noted. getattr: called by the VFS to get attributes of a file. This method is called by stat(2) and related system calls. - setxattr: called by the VFS to set an extended attribute for a file. - Extended attribute is a name:value pair associated with an - inode. This method is called by setxattr(2) system call. - - getxattr: called by the VFS to retrieve the value of an extended - attribute name. This method is called by getxattr(2) function - call. - listxattr: called by the VFS to list all extended attributes for a - given file. This method is called by listxattr(2) system call. - - removexattr: called by the VFS to remove an extended attribute from - a file. This method is called by removexattr(2) system call. + given file. This method is called by the listxattr(2) system call. update_time: called by the VFS to update a specific time or the i_version of an inode. If this is not defined the VFS will update the inode itself |