This patch series introduces a new configuration option called BPF_READAHEAD, which is designed to optimize the read performance in Spark SQL scenarios using eBPF to implement a programmable kernel.
The first patch adds the BPF_READAHEAD option to the kernel configuration, along with the necessary changes to the filesystem and memory management code to support this feature. The option is disabled by default and depends on CONFIG_TRACEPOINTS.
The second patch enables the BPF_READAHEAD option in the openeuler_defconfig files for arm64, powerpc, riscv, and x86 architectures. This ensures that the feature is available on the supported architectures when needed.
The BPF_READAHEAD option allows for fine-tuning the kernel's readahead behavior based on the application's read patterns, leading to improved read performance in specific scenarios like Spark SQL.
ZhaoLong Wang (2): mm, fs: Add BPF_READAHEAD build option for bpf readhead arch: Add BPF_READAHEAD config options for supported architectures
arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/riscv/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + fs/read_write.c | 4 ++-- include/linux/fs.h | 18 +++++++++++++----- include/trace/events/fs.h | 6 ++++++ mm/Kconfig | 9 +++++++++ 8 files changed, 34 insertions(+), 7 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7Y9JD CVE: NA
---------------------------
This patch introduces a new configuration option called BPF_READAHEAD, which is designed to optimize the read performance in Spark SQL scenarios using eBPF to implement a programmable kernel.
The changes include:
- Add CONFIG_BPF_READAHEAD to mm/Kconfig, which depends on CONFIG_TRACEPOINTS.
- Add conditional compilation directives to fs/ext4/file.c, fs/read_write.c, fs/xfs/xfs_file.c, and include/linux/fs.h to include tracepoint-related headers and functions only when BPF_READAHEAD is enabled.
- Miodify page_cache_sync_ra() in mm/readahead.c to disable forced readahead when BPF_READAHEAD is not enabled.
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- V2: Explicitly set CONFIG_ in openeuler_defconfig. Do not add redundant macros to mm/readahead.
V3: Place all macro isolation switches in the .h header file.
V4: Change the macro name BPF_READAHEAD_OPTIMIZATION to BPF_READAHEAD.
V5: Correcting macro name replacement mistakes.
V6: Split the original patch into two separate patches.
fs/read_write.c | 4 ++-- include/linux/fs.h | 18 +++++++++++++----- include/trace/events/fs.h | 6 ++++++ mm/Kconfig | 9 +++++++++ 4 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c index 3d69fb284d10..dd5c90675f51 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1721,7 +1721,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; }
-#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_BPF_READAHEAD static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx, struct file *filp, loff_t pos) { @@ -1752,7 +1752,7 @@ void fs_file_read_update_args_by_trace(struct kiocb *iocb) filp->f_ctl_mode &= ~(ctx.clr_f_mode & FS_FILE_READ_MODE_MASK); } EXPORT_SYMBOL_GPL(fs_file_read_update_args_by_trace); -#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_read); EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_release); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index d74314a8fa94..55d2844f7fc6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,7 +43,9 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#ifdef CONFIG_BPF_READAHEAD #include <linux/tracepoint-defs.h> +#endif #include <linux/kabi.h>
#include <asm/byteorder.h> @@ -190,11 +192,16 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async nowait buffered writes */ #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
+#ifdef CONFIG_BPF_READAHEAD /* File mode control flag, expect random access pattern */ #define FMODE_CTL_RANDOM ((__force fmode_t)0x1000)
/* File mode control flag, will try to read head of the file into pagecache */ #define FMODE_CTL_WILLNEED ((__force fmode_t)0x400000) +#else +#define FMODE_CTL_RANDOM 0 +#define FMODE_CTL_WILLNEED 0 +#endif
/* * Attribute flags. These should be or-ed together to figure out what @@ -3524,16 +3531,17 @@ struct fs_file_read_ctx { long long index; };
-#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_BPF_READAHEAD DECLARE_TRACEPOINT(fs_file_read); extern void fs_file_read_update_args_by_trace(struct kiocb *iocb); -#else -static inline void fs_file_read_update_args_by_trace(struct kiocb *iocb) {} -#endif - static inline void fs_file_read_do_trace(struct kiocb *iocb) { if (tracepoint_enabled(fs_file_read)) fs_file_read_update_args_by_trace(iocb); } +#else +static inline void fs_file_read_update_args_by_trace(struct kiocb *iocb) {} +static inline void fs_file_read_do_trace(struct kiocb *iocb) {} +#endif + #endif /* _LINUX_FS_H */ diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h index ee82dad9d9da..801aad6cb74d 100644 --- a/include/trace/events/fs.h +++ b/include/trace/events/fs.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_BPF_READAHEAD + #undef TRACE_SYSTEM #define TRACE_SYSTEM fs
@@ -31,3 +33,7 @@ DECLARE_TRACE(fs_file_release,
/* This part must be outside protection */ #include <trace/define_trace.h> +#else +#define trace_fs_file_release(...) +#define trace_fs_file_read(...) +#endif /* CONFIG_BPF_READAHEAD */ diff --git a/mm/Kconfig b/mm/Kconfig index cdbb1ceaa554..45d4139c959c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1424,6 +1424,15 @@ config ETMEM high-performance storage media to release memory space and reduce memory costs.
+config BPF_READAHEAD + bool "Enable bpf readahead optimization" + select TRACEPOINTS + default n + help + EBPF is used to implement a programmable kernel. The readahead behavior + of the kernel is adjusted based on the application read mode to optimize + the read performance in the Spark SQL scenario, + source "mm/damon/Kconfig"
endmenu
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7Y9JD CVE: NA
---------------------------
Add CONFIG_BPF_READAHEAD (disabled by default) to the openeuler_defconfig files for arm64, powerpc, riscv, and x86 architectures.
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/riscv/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 4 files changed, 4 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 9a6ef6175717..f3ab196f00ab 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1192,6 +1192,7 @@ CONFIG_DYNAMIC_POOL=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y +# CONFIG_BPF_READAHEAD is not set
# # Data Access Monitoring diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 152b941da1d5..19754209e3c8 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -860,6 +860,7 @@ CONFIG_ARCH_HAS_HUGEPD=y CONFIG_USERFAULTFD=y # CONFIG_LRU_GEN is not set CONFIG_LOCK_MM_AND_FIND_VMA=y +# CONFIG_BPF_READAHEAD is not set
# # Data Access Monitoring diff --git a/arch/riscv/configs/openeuler_defconfig b/arch/riscv/configs/openeuler_defconfig index cb132f4576da..90642dc106e2 100644 --- a/arch/riscv/configs/openeuler_defconfig +++ b/arch/riscv/configs/openeuler_defconfig @@ -825,6 +825,7 @@ CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y CONFIG_PER_VMA_LOCK=y CONFIG_LOCK_MM_AND_FIND_VMA=y # CONFIG_PAGE_CACHE_LIMIT is not set +# CONFIG_BPF_READAHEAD is not set
# # Data Access Monitoring diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 1d20beb4bb6d..f49fc3ae7713 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1179,6 +1179,7 @@ CONFIG_DYNAMIC_POOL=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y +# CONFIG_BPF_READAHEAD is not set
# # Data Access Monitoring
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/6590 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/L...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/6590 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/L...