hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7Y9JD CVE: NA
---------------------------
This patch introduces a new configuration option called BPF_READAHEAD, which is designed to optimize the read performance in Spark SQL scenarios using eBPF to implement a programmable kernel.
The changes include:
- Add CONFIG_BPF_READAHEAD to mm/Kconfig, which depends on CONFIG_TRACEPOINTS.
- Add conditional compilation directives to fs/ext4/file.c, fs/read_write.c, fs/xfs/xfs_file.c, and include/linux/fs.h to include tracepoint-related headers and functions only when BPF_READAHEAD is enabled.
- Miodify page_cache_sync_ra() in mm/readahead.c to disable forced readahead when BPF_READAHEAD is not enabled.
V2: Explicitly set CONFIG_ in openeuler_defconfig. Do not add redundant macros to mm/readahead.
V3: Place all macro isolation switches in the .h header file.
V4: Change the macro name BPF_READAHEAD_OPTIMIZATION to BPF_READAHEAD
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/riscv/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + fs/read_write.c | 4 ++-- include/linux/fs.h | 18 +++++++++++++----- include/trace/events/fs.h | 6 ++++++ mm/Kconfig | 9 +++++++++ 8 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 9a6ef6175717..b8bdcf53c062 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1192,6 +1192,7 @@ CONFIG_DYNAMIC_POOL=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y +# CONFIG_BPF_READAHEAD_OPTIMIZATION is not set
# # Data Access Monitoring diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 152b941da1d5..793cca85862c 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -860,6 +860,7 @@ CONFIG_ARCH_HAS_HUGEPD=y CONFIG_USERFAULTFD=y # CONFIG_LRU_GEN is not set CONFIG_LOCK_MM_AND_FIND_VMA=y +# CONFIG_BPF_READAHEAD_OPTIMIZATION is not set
# # Data Access Monitoring diff --git a/arch/riscv/configs/openeuler_defconfig b/arch/riscv/configs/openeuler_defconfig index cb132f4576da..06caed7e855a 100644 --- a/arch/riscv/configs/openeuler_defconfig +++ b/arch/riscv/configs/openeuler_defconfig @@ -825,6 +825,7 @@ CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y CONFIG_PER_VMA_LOCK=y CONFIG_LOCK_MM_AND_FIND_VMA=y # CONFIG_PAGE_CACHE_LIMIT is not set +# CONFIG_BPF_READAHEAD_OPTIMIZATION is not set
# # Data Access Monitoring diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 1d20beb4bb6d..3338dc1c7037 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1179,6 +1179,7 @@ CONFIG_DYNAMIC_POOL=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y +# CONFIG_BPF_READAHEAD_OPTIMIZATION is not set
# # Data Access Monitoring diff --git a/fs/read_write.c b/fs/read_write.c index 3d69fb284d10..265e40589be8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1721,7 +1721,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; }
-#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx, struct file *filp, loff_t pos) { @@ -1752,7 +1752,7 @@ void fs_file_read_update_args_by_trace(struct kiocb *iocb) filp->f_ctl_mode &= ~(ctx.clr_f_mode & FS_FILE_READ_MODE_MASK); } EXPORT_SYMBOL_GPL(fs_file_read_update_args_by_trace); -#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_read); EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_release); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index d74314a8fa94..2702d38b57ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,7 +43,9 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #include <linux/tracepoint-defs.h> +#endif #include <linux/kabi.h>
#include <asm/byteorder.h> @@ -190,11 +192,16 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async nowait buffered writes */ #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION /* File mode control flag, expect random access pattern */ #define FMODE_CTL_RANDOM ((__force fmode_t)0x1000)
/* File mode control flag, will try to read head of the file into pagecache */ #define FMODE_CTL_WILLNEED ((__force fmode_t)0x400000) +#else +#define FMODE_CTL_RANDOM 0 +#define FMODE_CTL_WILLNEED 0 +#endif
/* * Attribute flags. These should be or-ed together to figure out what @@ -3524,16 +3531,17 @@ struct fs_file_read_ctx { long long index; };
-#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION DECLARE_TRACEPOINT(fs_file_read); extern void fs_file_read_update_args_by_trace(struct kiocb *iocb); -#else -static inline void fs_file_read_update_args_by_trace(struct kiocb *iocb) {} -#endif - static inline void fs_file_read_do_trace(struct kiocb *iocb) { if (tracepoint_enabled(fs_file_read)) fs_file_read_update_args_by_trace(iocb); } +#else +static inline void fs_file_read_update_args_by_trace(struct kiocb *iocb) {} +static inline void fs_file_read_do_trace(struct kiocb *iocb) {} +#endif + #endif /* _LINUX_FS_H */ diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h index ee82dad9d9da..e0e595160f44 100644 --- a/include/trace/events/fs.h +++ b/include/trace/events/fs.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION + #undef TRACE_SYSTEM #define TRACE_SYSTEM fs
@@ -31,3 +33,7 @@ DECLARE_TRACE(fs_file_release,
/* This part must be outside protection */ #include <trace/define_trace.h> +#else +#define trace_fs_file_release(...) +#define trace_fs_file_read(...) +#endif /* CONFIG_BPF_READAHEAD_OPTIMIZATION */ diff --git a/mm/Kconfig b/mm/Kconfig index cdbb1ceaa554..45d4139c959c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1424,6 +1424,15 @@ config ETMEM high-performance storage media to release memory space and reduce memory costs.
+config BPF_READAHEAD + bool "Enable bpf readahead optimization" + select TRACEPOINTS + default n + help + EBPF is used to implement a programmable kernel. The readahead behavior + of the kernel is adjusted based on the application read mode to optimize + the read performance in the Spark SQL scenario, + source "mm/damon/Kconfig"
endmenu