From: John Garry <john.g.garry@oracle.com> mainline inclusion from mainline-v6.15-rc2 commit 5d894321c49e61379189b0ff605f316e39cbd1e9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID2HML CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- XFS will be able to support large atomic writes (atomic write > 1x block) in future. This will be achieved by using different operating methods, depending on the size of the write. Specifically a new method of operation based in FS atomic extent remapping will be supported in addition to the current HW offload-based method. The FS method will generally be appreciably slower performing than the HW-offload method. However the FS method will be typically able to contribute to achieving a larger atomic write unit max limit. XFS will support a hybrid mode, where HW offload method will be used when possible, i.e. HW offload is used when the length of the write is supported, and for other times FS-based atomic writes will be used. As such, there is an atomic write length at which the user may experience appreciably slower performance. Advertise this limit in a new statx field, stx_atomic_write_unit_max_opt. When zero, it means that there is no such performance boundary. Masks STATX{_ATTR}_WRITE_ATOMIC can be used to get this new field. This is ok for older kernels which don't support this new field, as they would report 0 in this field (from zeroing in cp_statx()) already. Furthermore those older kernels don't support large atomic writes - apart from block fops, but there would be consistent performance there for atomic writes in range [unit min, unit max]. Reviewed-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Acked-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: John Garry <john.g.garry@oracle.com> Conflicts: block/bdev.c fs/ext4/inode.c fs/stat.c include/linux/stat.h include/uapi/linux/stat.h Signed-off-by: Long Li <leo.lilong@huawei.com> --- fs/stat.c | 6 +++++- fs/xfs/xfs_iops.c | 2 +- include/linux/fs.h | 3 ++- include/linux/stat.h | 1 + include/uapi/linux/stat.h | 6 ++++-- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index 9ac0947a9a4b..2ce597f9845d 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -94,13 +94,15 @@ EXPORT_SYMBOL(generic_fill_statx_attr); * @stat: Where to fill in the attribute flags * @unit_min: Minimum supported atomic write length in bytes * @unit_max: Maximum supported atomic write length in bytes + * @unit_max_opt: Optimised maximum supported atomic write length in bytes * * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from * atomic write unit_min and unit_max values. */ void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max) + unsigned int unit_max, + unsigned int unit_max_opt) { /* Confirm that the request type is known */ stat->result_mask |= STATX_WRITE_ATOMIC; @@ -111,6 +113,7 @@ void generic_fill_statx_atomic_writes(struct kstat *stat, if (unit_min) { stat->atomic_write_unit_min = unit_min; stat->atomic_write_unit_max = unit_max; + stat->atomic_write_unit_max_opt = unit_max_opt; /* Initially only allow 1x segment */ stat->atomic_write_segments_max = 1; @@ -684,6 +687,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; + tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8c434a191a3c..28a2c17bbb96 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -571,7 +571,7 @@ xfs_report_atomic_write( if (xfs_inode_can_atomicwrite(ip)) unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize; - generic_fill_statx_atomic_writes(stat, unit_min, unit_max); + generic_fill_statx_atomic_writes(stat, unit_min, unit_max, 0); } STATIC int diff --git a/include/linux/fs.h b/include/linux/fs.h index 22c6695aad7e..bf71382ffa0a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3234,7 +3234,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index bea9a48049c8..b06d170eef09 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -56,6 +56,7 @@ struct kstat { u64 change_cookie; u32 atomic_write_unit_min; u32 atomic_write_unit_max; + u32 atomic_write_unit_max_opt; u32 atomic_write_segments_max; KABI_RESERVE(1) diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 9ca13620c947..44f10d925cff 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -132,9 +132,11 @@ struct statx { __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ /* 0xb0 */ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + __u32 stx_dio_read_offset_align; /* File offset alignment for direct I/O reads */ + __u32 stx_atomic_write_unit_max_opt; /* Optimised max atomic write unit in bytes */ + __u32 __spare2[1]; /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; -- 2.39.2