diff options
-rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 2 | ||||
-rw-r--r-- | fs/aio.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 2 | ||||
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/f2fs/data.c | 9 | ||||
-rw-r--r-- | fs/freevxfs/vxfs_dir.h | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 8 | ||||
-rw-r--r-- | fs/hfsplus/hfsplus_fs.h | 3 | ||||
-rw-r--r-- | fs/hfsplus/wrapper.c | 2 | ||||
-rw-r--r-- | fs/iomap/buffered-io.c | 2 | ||||
-rw-r--r-- | fs/mpage.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 41 | ||||
-rw-r--r-- | fs/namespace.c | 68 | ||||
-rw-r--r-- | include/linux/writeback.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/mount.h | 6 | ||||
-rw-r--r-- | init/initramfs.c | 15 |
18 files changed, 149 insertions, 31 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 69af2173555f..064012ea6f36 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2945,7 +2945,7 @@ following two functions. a queue (device) has been associated with the bio and before submission. - wbc_account_cgroup_owner(@wbc, @page, @bytes) + wbc_account_cgroup_owner(@wbc, @folio, @bytes) Should be called for each data segment being written out. While this function doesn't care exactly when it's called during the writeback session, it's the easiest and most @@ -2191,7 +2191,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return -EINVAL; spin_lock_irq(&ctx->ctx_lock); - /* TODO: use a hash or array, this sucks. */ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { if (kiocb->ki_res.obj == obj) { ret = kiocb->ki_cancel(&kiocb->rw); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 39c9677c47d5..4667d1e034e0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -785,7 +785,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, } if (bio_ctrl->wbc) - wbc_account_cgroup_owner(bio_ctrl->wbc, &folio->page, + wbc_account_cgroup_owner(bio_ctrl->wbc, folio, len); size -= len; @@ -1707,7 +1707,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, ret = bio_add_folio(&bbio->bio, folio, eb->len, eb->start - folio_pos(folio)); ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio_page(folio, 0), eb->len); + wbc_account_cgroup_owner(wbc, folio, eb->len); folio_unlock(folio); } else { int num_folios = num_extent_folios(eb); @@ -1721,8 +1721,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, folio_start_writeback(folio); ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio_page(folio, 0), - eb->folio_size); + wbc_account_cgroup_owner(wbc, folio, eb->folio_size); wbc->nr_to_write -= folio_nr_pages(folio); folio_unlock(folio); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index edac499fd83d..eb64f04755c2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1729,7 +1729,7 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode, * need full accuracy. Just account the whole thing * against the first page. */ - wbc_account_cgroup_owner(wbc, &locked_folio->page, + wbc_account_cgroup_owner(wbc, locked_folio, cur_end - start); async_chunk[i].locked_folio = locked_folio; locked_folio = NULL; diff --git a/fs/buffer.c b/fs/buffer.c index 1fc9a50def0b..32bd0f4c4223 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2803,7 +2803,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_write_hint = write_hint; - __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); + bio_add_folio_nofail(bio, bh->b_folio, bh->b_size, bh_offset(bh)); bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; @@ -2813,7 +2813,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, if (wbc) { wbc_init_bio(wbc, bio); - wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); + wbc_account_cgroup_owner(wbc, bh->b_folio, bh->b_size); } submit_bio(bio); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index ad5543866d21..b7b9261fec3b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -421,7 +421,7 @@ submit_and_retry: io_submit_init_bio(io, bh); if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh))) goto submit_and_retry; - wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size); + wbc_account_cgroup_owner(io->io_wbc, folio, bh->b_size); io->io_next_block++; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 94f7b084f601..e3ce763cce18 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -711,7 +711,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); @@ -911,7 +912,8 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); @@ -1011,7 +1013,8 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h index fbcd603365ad..8c67627f2a3d 100644 --- a/fs/freevxfs/vxfs_dir.h +++ b/fs/freevxfs/vxfs_dir.h @@ -25,7 +25,7 @@ struct vxfs_dirblk { __fs16 d_free; /* free space in dirblock */ __fs16 d_nhash; /* no of hash chains */ - __fs16 d_hash[1]; /* hash chain */ + __fs16 d_hash[]; /* hash chain */ }; /* diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 77db1f10023e..3cd99e2dc6ac 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -907,17 +907,16 @@ EXPORT_SYMBOL_GPL(wbc_detach_inode); /** * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership * @wbc: writeback_control of the writeback in progress - * @page: page being written out + * @folio: folio being written out * @bytes: number of bytes being written out * - * @bytes from @page are about to written out during the writeback + * @bytes from @folio are about to written out during the writeback * controlled by @wbc. Keep the book for foreign inode detection. See * wbc_detach_inode(). */ -void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes) { - struct folio *folio; struct cgroup_subsys_state *css; int id; @@ -930,7 +929,6 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, if (!wbc->wb || wbc->no_cgroup_owner) return; - folio = page_folio(page); css = mem_cgroup_css_from_folio(folio); /* dead cgroups shouldn't contribute to inode ownership arbitration */ if (!(css->flags & CSS_ONLINE)) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 59ce81dca73f..5389918bbf29 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -156,6 +156,7 @@ struct hfsplus_sb_info { /* Runtime variables */ u32 blockoffset; + u32 min_io_size; sector_t part_start; sector_t sect_count; int fs_shift; @@ -307,7 +308,7 @@ struct hfsplus_readdir_data { */ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) { - return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev), + return max_t(unsigned short, HFSPLUS_SB(sb)->min_io_size, HFSPLUS_SECTOR_SIZE); } diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index ce9346099c72..4b0fdc49d1fe 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -172,6 +172,8 @@ int hfsplus_read_wrapper(struct super_block *sb) if (!blocksize) goto out; + sbi->min_io_size = blocksize; + if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 11ea747228ae..3d1fae7d3a64 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1833,7 +1833,7 @@ new_ioend: if (ifs) atomic_add(len, &ifs->write_bytes_pending); wpc->ioend->io_size += len; - wbc_account_cgroup_owner(wbc, &folio->page, len); + wbc_account_cgroup_owner(wbc, folio, len); return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index b5b5ddf9d513..82aecf372743 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -606,7 +606,7 @@ alloc_new: * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ - wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio)); + wbc_account_cgroup_owner(wbc, folio, folio_size(folio)); length = first_unmapped << blkbits; if (!bio_add_folio(bio, folio, length, 0)) { bio = mpage_bio_submit_write(bio); diff --git a/fs/namei.c b/fs/namei.c index 4a4a22a08ac2..05a8d544fb35 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -326,6 +326,25 @@ static int check_acl(struct mnt_idmap *idmap, return -EAGAIN; } +/* + * Very quick optimistic "we know we have no ACL's" check. + * + * Note that this is purely for ACL_TYPE_ACCESS, and purely + * for the "we have cached that there are no ACLs" case. + * + * If this returns true, we know there are no ACLs. But if + * it returns false, we might still not have ACLs (it could + * be the is_uncached_acl() case). + */ +static inline bool no_acl_inode(struct inode *inode) +{ +#ifdef CONFIG_FS_POSIX_ACL + return likely(!READ_ONCE(inode->i_acl)); +#else + return true; +#endif +} + /** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from @@ -348,6 +367,28 @@ static int acl_permission_check(struct mnt_idmap *idmap, unsigned int mode = inode->i_mode; vfsuid_t vfsuid; + /* + * Common cheap case: everybody has the requested + * rights, and there are no ACLs to check. No need + * to do any owner/group checks in that case. + * + * - 'mask&7' is the requested permission bit set + * - multiplying by 0111 spreads them out to all of ugo + * - '& ~mode' looks for missing inode permission bits + * - the '!' is for "no missing permissions" + * + * After that, we just need to check that there are no + * ACL's on the inode - do the 'IS_POSIXACL()' check last + * because it will dereference the ->i_sb pointer and we + * want to avoid that if at all possible. + */ + if (!((mask & 7) * 0111 & ~mode)) { + if (no_acl_inode(inode)) + return 0; + if (!IS_POSIXACL(inode)) + return 0; + } + /* Are we the owner? If so, ACL's don't matter */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { diff --git a/fs/namespace.c b/fs/namespace.c index 9a3c251d033d..d32b5afa99dc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5004,6 +5004,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq) return 0; } +static void statmount_fs_subtype(struct kstatmount *s, struct seq_file *seq) +{ + struct super_block *sb = s->mnt->mnt_sb; + + if (sb->s_subtype) + seq_puts(seq, sb->s_subtype); +} + +static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq) +{ + struct super_block *sb = s->mnt->mnt_sb; + struct mount *r = real_mount(s->mnt); + + if (sb->s_op->show_devname) { + size_t start = seq->count; + int ret; + + ret = sb->s_op->show_devname(seq, s->mnt->mnt_root); + if (ret) + return ret; + + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + + /* Unescape the result */ + seq->buf[seq->count] = '\0'; + seq->count = start; + seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL)); + } else if (r->mnt_devname) { + seq_puts(seq, r->mnt_devname); + } + return 0; +} + static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns) { s->sm.mask |= STATMOUNT_MNT_NS_ID; @@ -5040,33 +5074,48 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) static int statmount_string(struct kstatmount *s, u64 flag) { - int ret; + int ret = 0; size_t kbufsize; struct seq_file *seq = &s->seq; struct statmount *sm = &s->sm; + u32 start = seq->count; switch (flag) { case STATMOUNT_FS_TYPE: - sm->fs_type = seq->count; + sm->fs_type = start; ret = statmount_fs_type(s, seq); break; case STATMOUNT_MNT_ROOT: - sm->mnt_root = seq->count; + sm->mnt_root = start; ret = statmount_mnt_root(s, seq); break; case STATMOUNT_MNT_POINT: - sm->mnt_point = seq->count; + sm->mnt_point = start; ret = statmount_mnt_point(s, seq); break; case STATMOUNT_MNT_OPTS: - sm->mnt_opts = seq->count; + sm->mnt_opts = start; ret = statmount_mnt_opts(s, seq); break; + case STATMOUNT_FS_SUBTYPE: + sm->fs_subtype = start; + statmount_fs_subtype(s, seq); + break; + case STATMOUNT_SB_SOURCE: + sm->sb_source = start; + ret = statmount_sb_source(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; } + /* + * If nothing was emitted, return to avoid setting the flag + * and terminating the buffer. + */ + if (seq->count == start) + return ret; if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize))) return -EOVERFLOW; if (kbufsize >= s->bufsize) @@ -5201,6 +5250,12 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, if (!err && s->mask & STATMOUNT_MNT_OPTS) err = statmount_string(s, STATMOUNT_MNT_OPTS); + if (!err && s->mask & STATMOUNT_FS_SUBTYPE) + err = statmount_string(s, STATMOUNT_FS_SUBTYPE); + + if (!err && s->mask & STATMOUNT_SB_SOURCE) + err = statmount_string(s, STATMOUNT_SB_SOURCE); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) statmount_mnt_ns_id(s, ns); @@ -5222,7 +5277,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) } #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ - STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS) + STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ + STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE) static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index aee3e1b4c50f..d11b903c2edb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -214,7 +214,7 @@ static inline void wait_on_inode(struct inode *inode) void __inode_attach_wb(struct inode *inode, struct folio *folio); void wbc_detach_inode(struct writeback_control *wbc); -void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); @@ -300,7 +300,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) } static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, - struct page *page, size_t bytes) + struct folio *folio, size_t bytes) { } diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 225bc366ffcb..2b49e9131d77 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -173,7 +173,9 @@ struct statmount { __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ - __u64 __spare2[49]; + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u64 __spare2[48]; char str[]; /* Variable size part containing strings */ }; @@ -207,6 +209,8 @@ struct mnt_id_req { #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ /* * Special @mnt_id values that can be passed to listmount diff --git a/init/initramfs.c b/init/initramfs.c index bc911e466d5b..b2f7583bb1f5 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -360,6 +360,15 @@ static int __init do_name(void) { state = SkipIt; next_state = Reset; + + /* name_len > 0 && name_len <= PATH_MAX checked in do_header */ + if (collected[name_len - 1] != '\0') { + pr_err("initramfs name without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } + if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; @@ -424,6 +433,12 @@ static int __init do_copy(void) static int __init do_symlink(void) { + if (collected[name_len - 1] != '\0') { + pr_err("initramfs symlink without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); init_symlink(collected + N_ALIGN(name_len), collected); |