
From: Jingbo Xu <jefflexu@linux.alibaba.com> anolis inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC6CFR CVE: NA -------------------------------- ANBZ: #9568 Sometimes the file offset alignment needs to be opt-in to achieve the optimum performance at the backend store. For example when ErasureCode [1] is used at the backend store, the optimum write performance is achieved when the WRITE request is aligned with the stripe size of ErasureCode. Otherwise a non-aligned WRITE request needs to be split at the stripe size boundary. It is quite costly to handle these split partial requests, as firstly the whole stripe to which the split partial request belongs needs to be read out, then overwrite the read stripe buffer with the request, and finally write the whole stripe back to the persistent storage. Thus the backend store can suffer severe performance degradation when WRITE requests can not fit into one stripe exactly. The write performance can be 10x slower when the request is 256KB in size given 4MB stripe size. Also there can be 50% performance degradation in theory if the request is not stripe boundary aligned. Besides, the conveyed test indicates that, the non-alignment issue becomes more severe when decreasing fuse's max_ratio, maybe partly because the background writeback now is more likely to run parallelly with the dirtier. fuse's max_ratio ratio of aligned WRITE requests ---------------- ------------------------------- 70 99.9% 40 74% 20 45% 10 20% With the patched version, which makes the alignment constraint opt-in when constructing WRITE requests, the ratio of aligned WRITE requests increases to 98% (previously 20%) when fuse's max_ratio is 10. [1] https://lore.kernel.org/linux-fsdevel/20240124070512.52207-1-jefflexu@linux.... Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/3533 Conflicts: fs/fuse/file.c fs/fuse/fuse_i.h fs/fuse/inode.c include/uapi/linux/fuse.h [Context conflict.] Signed-off-by: Wang Zhaolong <wangzhaolong1@huawei.com> --- fs/fuse/file.c | 4 ++++ fs/fuse/fuse_i.h | 6 ++++++ fs/fuse/inode.c | 10 +++++++++- include/uapi/linux/fuse.h | 2 ++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7200b176ac79..37282b8363f0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2303,10 +2303,14 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, /* Need to grow the pages array? If so, did the expansion fail? */ if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) return true; + /* Reached alignment boundary */ + if (fc->write_alignment && !(page->index % fc->write_align_pages)) + return true; + return false; } static int fuse_writepages_fill(struct folio *folio, struct writeback_control *wbc, void *_data) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c9902fb877cb..7dd7471962b9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -641,10 +641,13 @@ struct fuse_conn { unsigned max_read; /** Maximum write size */ unsigned max_write; + /* Maxmum number of pages that write request should be aligned with */ + unsigned int write_align_pages; + /** Maximum number of pages that can be used in a single request */ unsigned int max_pages; /** Constrain ->max_pages to this value during feature negotiation */ unsigned int max_pages_limit; @@ -887,10 +890,13 @@ struct fuse_conn { unsigned int use_pages_for_kvec_io:1; /** Passthrough support for read/write IO */ unsigned int passthrough:1; + /* write reques is aligned on max_write boundary */ + unsigned int write_alignment:1; + /** Maximum stack depth for passthrough backing files */ int max_stack_depth; /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a1669c498f5e..9caa80973a5c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1352,10 +1352,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; if (flags & FUSE_SEPARATE_BACKGROUND) fc->separate_background = 1; + if (flags & FUSE_WRITE_ALIGNMENT) + fc->write_alignment = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; fc->no_flock = 1; } @@ -1363,10 +1365,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; fc->max_write = max_t(unsigned, 4096, fc->max_write); + if (fc->write_alignment) { + if (fc->max_write % PAGE_SIZE) + ok = false; + else + fc->write_align_pages = fc->max_write >> PAGE_SHIFT; + } fc->conn_init = 1; } kfree(ia); if (!ok) { @@ -1400,11 +1408,11 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | - FUSE_SEPARATE_BACKGROUND; + FUSE_SEPARATE_BACKGROUND | FUSE_WRITE_ALIGNMENT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) flags |= FUSE_HAS_INODE_DAX; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2a84cecf75a1..dd1809072e63 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -421,10 +421,11 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests * FUSE_SEPARATE_BACKGROUND: separate background queue for WRITE requests and * the others + * FUSE_WRITE_ALIGNMENT: write request is aligned on max_write boundary */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_FILE_OPS (1 << 2) #define FUSE_ATOMIC_O_TRUNC (1 << 3) @@ -463,10 +464,11 @@ struct fuse_file_lock { #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) #define FUSE_PASSTHROUGH (1ULL << 37) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) +#define FUSE_WRITE_ALIGNMENT (1ULL << 55) #define FUSE_SEPARATE_BACKGROUND (1ULL << 56) /* The 57th bit is left to FUSE_HAS_RECOVERY */ /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP -- 2.34.3