This patch introduces a new configuration option called BPF_READAHEAD_OPTIMIZATION, which is designed to optimize the read performance in Spark SQL scenarios using eBPF to implement a programmable kernel.
The changes include:
- Add CONFIG_BPF_READAHEAD_OPTIMIZATION to mm/Kconfig, which depends on CONFIG_TRACEPOINTS.
- Add conditional compilation directives to fs/ext4/file.c, fs/read_write.c, fs/xfs/xfs_file.c, and include/linux/fs.h to include tracepoint-related headers and functions only when BPF_READAHEAD_OPTIMIZATION is enabled.
- Miodify page_cache_sync_ra() in mm/readahead.c to disable forced readahead when BPF_READAHEAD_OPTIMIZATION is not enabled.
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- fs/ext4/file.c | 6 ++++++ fs/read_write.c | 2 +- fs/xfs/xfs_file.c | 7 ++++++- include/linux/fs.h | 8 ++++++++ mm/Kconfig | 9 +++++++++ mm/readahead.c | 10 ++++++++++ 6 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 324b45b51d1f..14f1441c3391 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -30,7 +30,9 @@ #include <linux/uio.h> #include <linux/mman.h> #include <linux/backing-dev.h> +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #include <trace/events/fs.h> +#endif #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -145,7 +147,9 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_flags & IOCB_DIRECT) return ext4_dio_read_iter(iocb, to);
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION fs_file_read_do_trace(iocb); +#endif return generic_file_read_iter(iocb, to); }
@@ -167,7 +171,9 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos, */ static int ext4_release_file(struct inode *inode, struct file *filp) { +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION trace_fs_file_release(inode, filp); +#endif
if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); diff --git a/fs/read_write.c b/fs/read_write.c index 3d69fb284d10..2ee9a07f7208 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1721,7 +1721,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; }
-#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx, struct file *filp, loff_t pos) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ecf10d3aec17..7a4bd39bc694 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -31,7 +31,9 @@ #include <linux/mman.h> #include <linux/fadvise.h> #include <linux/mount.h> +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #include <trace/events/fs.h> +#endif
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -308,8 +310,9 @@ xfs_file_buffered_read( ssize_t ret;
trace_xfs_file_buffered_read(iocb, to); +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION fs_file_read_do_trace(iocb); - +#endif ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); if (ret) return ret; @@ -1266,7 +1269,9 @@ xfs_file_release( struct inode *inode, struct file *filp) { +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION trace_fs_file_release(inode, filp); +#endif return xfs_release(XFS_I(inode)); }
diff --git a/include/linux/fs.h b/include/linux/fs.h index d74314a8fa94..eb50bcbfcc24 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,7 +43,9 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #include <linux/tracepoint-defs.h> +#endif #include <linux/kabi.h>
#include <asm/byteorder.h> @@ -190,11 +192,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async nowait buffered writes */ #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION /* File mode control flag, expect random access pattern */ #define FMODE_CTL_RANDOM ((__force fmode_t)0x1000)
/* File mode control flag, will try to read head of the file into pagecache */ #define FMODE_CTL_WILLNEED ((__force fmode_t)0x400000) +#endif
/* * Attribute flags. These should be or-ed together to figure out what @@ -1058,7 +1062,9 @@ struct file { struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION fmode_t f_ctl_mode; +#endif KABI_RESERVE(1) KABI_RESERVE(2) } __randomize_layout @@ -3524,6 +3530,7 @@ struct fs_file_read_ctx { long long index; };
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #ifdef CONFIG_TRACEPOINTS DECLARE_TRACEPOINT(fs_file_read); extern void fs_file_read_update_args_by_trace(struct kiocb *iocb); @@ -3536,4 +3543,5 @@ static inline void fs_file_read_do_trace(struct kiocb *iocb) if (tracepoint_enabled(fs_file_read)) fs_file_read_update_args_by_trace(iocb); } +#endif #endif /* _LINUX_FS_H */ diff --git a/mm/Kconfig b/mm/Kconfig index cdbb1ceaa554..27c4bd8238c9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1424,6 +1424,15 @@ config ETMEM high-performance storage media to release memory space and reduce memory costs.
+config BPF_READAHEAD_OPTIMIZATION + bool "Enable bpf readahead optimization" + depends on TRACEPOINTS + default n + help + EBPF is used to implement a programmable kernel. The readahead behavior + of the kernel is adjusted based on the application read mode to optimize + the read performance in the Spark SQL scenario, + source "mm/damon/Kconfig"
endmenu diff --git a/mm/readahead.c b/mm/readahead.c index 4d0dbfd62d20..4f8e339e2d6e 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -131,7 +131,9 @@
#include "internal.h"
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION #define READAHEAD_FIRST_SIZE (2 * 1024 * 1024) +#endif /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -669,6 +671,7 @@ static void ondemand_readahead(struct readahead_control *ractl, page_cache_ra_order(ractl, ra, order); }
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION /* * Try to read first @ra_size from head of the file. */ @@ -697,13 +700,18 @@ static bool page_cache_readahead_from_head(struct address_space *mapping, } return true; } +#endif
void page_cache_sync_ra(struct readahead_control *ractl, unsigned long req_count) { +#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION bool do_forced_ra = ractl->file && ((ractl->file->f_mode & FMODE_RANDOM) || (ractl->file->f_ctl_mode & FMODE_CTL_RANDOM)); +#else + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); +#endif
/* * Even if readahead is disabled, issue this request as readahead @@ -718,11 +726,13 @@ void page_cache_sync_ra(struct readahead_control *ractl, do_forced_ra = true; }
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION /* try to read first READAHEAD_FIRST_SIZE into pagecache */ if (ractl->file && (ractl->file->f_ctl_mode & FMODE_CTL_WILLNEED) && page_cache_readahead_from_head(ractl->mapping, ractl->file, readahead_index(ractl), req_count, READAHEAD_FIRST_SIZE)) return; +#endif
/* be dumb */ if (do_forced_ra) {
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/5735 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/6...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/5735 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/6...