
From: Christian Brauner <brauner@kernel.org> mainline inclusion from mainline-v6.9-rc1 commit 210a03c9d51aa0e6e6f06980116e3256da8d4c48 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICPOEB CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- There's a bunch of flags that are purely based on what the file operations support while also never being conditionally set or unset. IOW, they're not subject to change for individual files. Imho, such flags don't need to live in f_mode they might as well live in the fops structs itself. And the fops struct already has that lonely mmap_supported_flags member. We might as well turn that into a generic fop_flags member and move a few flags from FMODE_* space into FOP_* space. That gets us four FMODE_* bits back and the ability for new static flags that are about file ops to not have to live in FMODE_* space but in their own FOP_* space. It's not the most beautiful thing ever but it gets the job done. Yes, there'll be an additional pointer chase but hopefully that won't matter for these flags. I suspect there's a few more we can move into there and that we can also redirect a bunch of new flag suggestions that follow this pattern into the fop_flags field instead of f_mode. Link: https://lore.kernel.org/r/20240328-gewendet-spargel-aa60a030ef74@brauner Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Christian Brauner <brauner@kernel.org> Conflicts: block/bdev.c fs/f2fs/file.c include/linux/fs.h io_uring/rw.c Signed-off-by: Long Li <leo.lilong@huawei.com> --- block/fops.c | 3 ++- drivers/dax/device.c | 2 +- fs/btrfs/file.c | 4 ++-- fs/ext4/file.c | 6 +++--- fs/f2fs/file.c | 3 ++- fs/read_write.c | 2 +- fs/xfs/xfs_file.c | 8 +++++--- include/linux/fs.h | 22 ++++++++++++---------- io_uring/io_uring.c | 2 +- io_uring/rw.c | 9 +++++---- mm/mmap.c | 4 +++- 11 files changed, 37 insertions(+), 28 deletions(-) diff --git a/block/fops.c b/block/fops.c index cda9978ebf67..f307e92d7fa9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -594,7 +594,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) * during an unstable branch. */ filp->f_flags |= O_LARGEFILE; - filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + filp->f_mode |= FMODE_CAN_ODIRECT; mode = file_to_blk_mode(filp); handle = bdev_open_by_dev(inode->i_rdev, mode, @@ -852,6 +852,7 @@ const struct file_operations def_blk_fops = { .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .fop_flags = FOP_BUFFER_RASYNC, }; static __init int blkdev_init(void) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 01e89b7ac637..cfb122b3fee3 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -377,7 +377,7 @@ static const struct file_operations dax_fops = { .release = dax_release, .get_unmapped_area = dax_get_unmapped_area, .mmap = dax_mmap, - .mmap_supported_flags = MAP_SYNC, + .fop_flags = FOP_MMAP_SYNC, }; static void dev_dax_cdev_del(void *cdev) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 68092b64e29e..bd6f711b759a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3757,8 +3757,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp) { int ret; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | - FMODE_CAN_ODIRECT; + filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; ret = fsverity_file_open(inode, filp); if (ret) @@ -3888,6 +3887,7 @@ const struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_compat_ioctl, #endif .remap_file_range = btrfs_remap_file_range, + .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC, }; int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ca57a5efd2ec..afa60bd4ae63 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -907,8 +907,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp) return ret; } - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | - FMODE_DIO_PARALLEL_WRITE; + filp->f_mode |= FMODE_NOWAIT; return dquot_file_open(inode, filp); } @@ -960,7 +959,6 @@ const struct file_operations ext4_file_operations = { .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, - .mmap_supported_flags = MAP_SYNC, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, @@ -968,6 +966,8 @@ const struct file_operations ext4_file_operations = { .splice_read = ext4_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | + FOP_DIO_PARALLEL_WRITE, }; const struct inode_operations ext4_file_inode_operations = { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ae129044c52f..0defd5de6f0b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -587,7 +587,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; err = dquot_file_open(inode, filp); @@ -5158,4 +5158,5 @@ const struct file_operations f2fs_file_operations = { .splice_read = f2fs_file_splice_read, .splice_write = iter_file_splice_write, .fadvise = f2fs_file_fadvise, + .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/fs/read_write.c b/fs/read_write.c index 27a7729b82f3..c8c305e16c85 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1715,7 +1715,7 @@ int generic_write_checks_count(struct kiocb *iocb, loff_t *count) if ((iocb->ki_flags & IOCB_NOWAIT) && !((iocb->ki_flags & IOCB_DIRECT) || - (file->f_mode & FMODE_BUF_WASYNC))) + (file->f_op->fop_flags & FOP_BUFFER_WASYNC))) return -EINVAL; return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e0cef2cb5c53..02dc03060fa0 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1233,8 +1233,7 @@ xfs_file_open( { if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; - file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | - FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT; + file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; return generic_file_open(inode, file); } @@ -1494,7 +1493,6 @@ const struct file_operations xfs_file_operations = { .compat_ioctl = xfs_file_compat_ioctl, #endif .mmap = xfs_file_mmap, - .mmap_supported_flags = MAP_SYNC, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, @@ -1502,6 +1500,8 @@ const struct file_operations xfs_file_operations = { .fallocate = xfs_file_fallocate, .fadvise = xfs_file_fadvise, .remap_file_range = xfs_file_remap_range, + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC | + FOP_DIO_PARALLEL_WRITE, }; const struct file_operations xfs_dir_file_operations = { @@ -1514,4 +1514,6 @@ const struct file_operations xfs_dir_file_operations = { .compat_ioctl = xfs_file_compat_ioctl, #endif .fsync = xfs_dir_fsync, + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC | + FOP_DIO_PARALLEL_WRITE, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index b6ccdceb4f94..1647ddb5ea16 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -169,9 +169,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)0x800000) -/* File supports non-exclusive O_DIRECT writes from multiple threads */ -#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) - /* File is embedded in backing_file object */ #define FMODE_BACKING ((__force fmode_t)0x2000000) @@ -187,12 +184,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) -/* File supports async buffered reads */ -#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) - -/* File supports async nowait buffered writes */ -#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) - #ifdef CONFIG_BPF_READAHEAD /* File mode control flag, expect random access pattern */ #define FMODE_CTL_RANDOM ((__force fmode_t)0x1000) @@ -2021,8 +2012,11 @@ struct iov_iter; struct io_uring_cmd; struct offset_ctx; +typedef unsigned int __bitwise fop_flags_t; + struct file_operations { struct module *owner; + fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); @@ -2035,7 +2029,6 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); - unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); @@ -2074,6 +2067,15 @@ struct file_operations { KABI_RESERVE(7) } __randomize_layout; +/* Supports async buffered reads */ +#define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) +/* Supports async buffered writes */ +#define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) +/* Supports synchronous page faults for mappings */ +#define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) +/* Supports non-exclusive O_DIRECT writes from multiple threads */ +#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) + /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8b15e9dc340f..0784a853b91e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -465,7 +465,7 @@ static void io_prep_async_work(struct io_kiocb *req) /* don't serialize this request if the fs doesn't need it */ if (should_hash && (req->file->f_flags & O_DIRECT) && - (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE)) + (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE)) should_hash = false; if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) io_wq_hash_work(&req->work, file_inode(req->file)); diff --git a/io_uring/rw.c b/io_uring/rw.c index 75b001febb4d..79522f2784fd 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -629,7 +629,8 @@ static bool io_rw_should_retry(struct io_kiocb *req) * just use poll if we can, and don't attempt if the fs doesn't * support callback based unlocks */ - if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) + if (file_can_poll(req->file) || + !(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC)) return false; wait->wait.func = io_async_buf_func; @@ -930,10 +931,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!io_file_supports_nowait(req))) goto copy_iov; - /* File path supports NOWAIT for non-direct_IO only for block devices. */ + /* Check if we can support NOWAIT. */ if (!(kiocb->ki_flags & IOCB_DIRECT) && - !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) && - (req->flags & REQ_F_ISREG)) + !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) && + (req->flags & REQ_F_ISREG)) goto copy_iov; kiocb->ki_flags |= IOCB_NOWAIT; diff --git a/mm/mmap.c b/mm/mmap.c index fb54df419ea2..64ad0e9c7560 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1352,7 +1352,9 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon if (!file_mmap_ok(file, inode, pgoff, len)) return -EOVERFLOW; - flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; + flags_mask = LEGACY_MAP_MASK; + if (file->f_op->fop_flags & FOP_MMAP_SYNC) + flags_mask |= MAP_SYNC; switch (flags & MAP_TYPE) { case MAP_SHARED: -- 2.39.2