This patch introduces a new configuration option called
BPF_READAHEAD_OPTIMIZATION, which is designed to optimize the read
performance in Spark SQL scenarios using eBPF to implement a
programmable kernel.
The changes include:
- Add CONFIG_BPF_READAHEAD_OPTIMIZATION to mm/Kconfig, which depends
on CONFIG_TRACEPOINTS.
- Add conditional compilation directives to fs/ext4/file.c, fs/read_write.c,
fs/xfs/xfs_file.c, and include/linux/fs.h to include tracepoint-related
headers and functions only when BPF_READAHEAD_OPTIMIZATION is enabled.
- Miodify page_cache_sync_ra() in mm/readahead.c to disable forced readahead
when BPF_READAHEAD_OPTIMIZATION is not enabled.
Signed-off-by: ZhaoLong Wang <wangzhaolong1(a)huawei.com>
---
fs/ext4/file.c | 6 ++++++
fs/read_write.c | 2 +-
fs/xfs/xfs_file.c | 7 ++++++-
include/linux/fs.h | 8 ++++++++
mm/Kconfig | 9 +++++++++
mm/readahead.c | 10 ++++++++++
6 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 324b45b51d1f..14f1441c3391 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -30,7 +30,9 @@
#include <linux/uio.h>
#include <linux/mman.h>
#include <linux/backing-dev.h>
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
#include <trace/events/fs.h>
+#endif
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -145,7 +147,9 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_read_iter(iocb, to);
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
fs_file_read_do_trace(iocb);
+#endif
return generic_file_read_iter(iocb, to);
}
@@ -167,7 +171,9 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
*/
static int ext4_release_file(struct inode *inode, struct file *filp)
{
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
trace_fs_file_release(inode, filp);
+#endif
if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
ext4_alloc_da_blocks(inode);
diff --git a/fs/read_write.c b/fs/read_write.c
index 3d69fb284d10..2ee9a07f7208 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1721,7 +1721,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
return 0;
}
-#ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx,
struct file *filp, loff_t pos)
{
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ecf10d3aec17..7a4bd39bc694 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -31,7 +31,9 @@
#include <linux/mman.h>
#include <linux/fadvise.h>
#include <linux/mount.h>
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
#include <trace/events/fs.h>
+#endif
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -308,8 +310,9 @@ xfs_file_buffered_read(
ssize_t ret;
trace_xfs_file_buffered_read(iocb, to);
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
fs_file_read_do_trace(iocb);
-
+#endif
ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
if (ret)
return ret;
@@ -1266,7 +1269,9 @@ xfs_file_release(
struct inode *inode,
struct file *filp)
{
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
trace_fs_file_release(inode, filp);
+#endif
return xfs_release(XFS_I(inode));
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d74314a8fa94..eb50bcbfcc24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -43,7 +43,9 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
#include <linux/tracepoint-defs.h>
+#endif
#include <linux/kabi.h>
#include <asm/byteorder.h>
@@ -190,11 +192,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports async nowait buffered writes */
#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
/* File mode control flag, expect random access pattern */
#define FMODE_CTL_RANDOM ((__force fmode_t)0x1000)
/* File mode control flag, will try to read head of the file into pagecache */
#define FMODE_CTL_WILLNEED ((__force fmode_t)0x400000)
+#endif
/*
* Attribute flags. These should be or-ed together to figure out what
@@ -1058,7 +1062,9 @@ struct file {
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
fmode_t f_ctl_mode;
+#endif
KABI_RESERVE(1)
KABI_RESERVE(2)
} __randomize_layout
@@ -3524,6 +3530,7 @@ struct fs_file_read_ctx {
long long index;
};
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
#ifdef CONFIG_TRACEPOINTS
DECLARE_TRACEPOINT(fs_file_read);
extern void fs_file_read_update_args_by_trace(struct kiocb *iocb);
@@ -3536,4 +3543,5 @@ static inline void fs_file_read_do_trace(struct kiocb *iocb)
if (tracepoint_enabled(fs_file_read))
fs_file_read_update_args_by_trace(iocb);
}
+#endif
#endif /* _LINUX_FS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index cdbb1ceaa554..27c4bd8238c9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1424,6 +1424,15 @@ config ETMEM
high-performance storage media to release memory space and reduce
memory costs.
+config BPF_READAHEAD_OPTIMIZATION
+ bool "Enable bpf readahead optimization"
+ depends on TRACEPOINTS
+ default n
+ help
+ EBPF is used to implement a programmable kernel. The readahead behavior
+ of the kernel is adjusted based on the application read mode to optimize
+ the read performance in the Spark SQL scenario,
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/readahead.c b/mm/readahead.c
index 4d0dbfd62d20..4f8e339e2d6e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -131,7 +131,9 @@
#include "internal.h"
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
#define READAHEAD_FIRST_SIZE (2 * 1024 * 1024)
+#endif
/*
* Initialise a struct file's readahead state. Assumes that the caller has
* memset *ra to zero.
@@ -669,6 +671,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
page_cache_ra_order(ractl, ra, order);
}
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
/*
* Try to read first @ra_size from head of the file.
*/
@@ -697,13 +700,18 @@ static bool page_cache_readahead_from_head(struct address_space *mapping,
}
return true;
}
+#endif
void page_cache_sync_ra(struct readahead_control *ractl,
unsigned long req_count)
{
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
bool do_forced_ra = ractl->file &&
((ractl->file->f_mode & FMODE_RANDOM) ||
(ractl->file->f_ctl_mode & FMODE_CTL_RANDOM));
+#else
+ bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
+#endif
/*
* Even if readahead is disabled, issue this request as readahead
@@ -718,11 +726,13 @@ void page_cache_sync_ra(struct readahead_control *ractl,
do_forced_ra = true;
}
+#ifdef CONFIG_BPF_READAHEAD_OPTIMIZATION
/* try to read first READAHEAD_FIRST_SIZE into pagecache */
if (ractl->file && (ractl->file->f_ctl_mode & FMODE_CTL_WILLNEED) &&
page_cache_readahead_from_head(ractl->mapping, ractl->file,
readahead_index(ractl), req_count, READAHEAD_FIRST_SIZE))
return;
+#endif
/* be dumb */
if (do_forced_ra) {
--
2.39.2