From: John Garry <john.g.garry@oracle.com> mainline inclusion from mainline-v6.14-rc1 commit 794ca29dcc924cd3f16d12b6fba61074c992b8fd category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID2HML CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Currently atomic write support requires dedicated HW support. This imposes a restriction on the filesystem that disk blocks need to be aligned and contiguously mapped to FS blocks to issue atomic writes. XFS has no method to guarantee FS block alignment for regular, non-RT files. As such, atomic writes are currently limited to 1x FS block there. To deal with the scenario that we are issuing an atomic write over misaligned or discontiguous data blocks - and raise the atomic write size limit - support a SW-based software emulated atomic write mode. For XFS, this SW-based atomic writes would use CoW support to issue emulated untorn writes. It is the responsibility of the FS to detect discontiguous atomic writes and switch to IOMAP_DIO_ATOMIC_SW mode and retry the write. Indeed, SW-based atomic writes could be used always when the mounted bdev does not support HW offload, but this strategy is not initially expected to be used. Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: John Garry <john.g.garry@oracle.com> Link: https://lore.kernel.org/r/20250303171120.2837067-6-john.g.garry@oracle.com Signed-off-by: Christian Brauner <brauner@kernel.org> Conflicts: include/linux/iomap.h Signed-off-by: Long Li <leo.lilong@huawei.com> --- fs/iomap/direct-io.c | 4 +++- include/linux/iomap.h | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index c5be30dbf19b..9d605cb6824f 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -628,7 +628,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_OVERWRITE_ONLY; } - if (iocb->ki_flags & IOCB_ATOMIC) + if (dio_flags & IOMAP_DIO_ATOMIC_SW) + iomi.flags |= IOMAP_ATOMIC_SW; + else if (iocb->ki_flags & IOCB_ATOMIC) iomi.flags |= IOMAP_ATOMIC_HW; /* for data sync or sync, we need sync completion processing */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4590bb88997d..e9237f5e3bfc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -178,7 +178,8 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ -#define IOMAP_ATOMIC_HW (1 << 9) +#define IOMAP_ATOMIC_HW (1 << 9) /* HW-based torn-write protection */ +#define IOMAP_ATOMIC_SW (1 << 11)/* SW-based torn-write protection */ struct iomap_ops { /* @@ -418,6 +419,11 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_PARTIAL (1 << 2) +/* + * Use software-based torn-write protection. + */ +#define IOMAP_DIO_ATOMIC_SW (1 << 3) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); -- 2.39.2