CONFIG: CONFIG_DEBUG_INFO_BTF=y CONFIG_BPF_SYSCALL=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y
V2: - Using helpers function, such as i_size_read(), round_up - Fix commit log Signature - Fix the user-mode test program
Hou Tao (5): vfs: add bare tracepoints for vfs read and release fs: add helper fs_file_read_do_trace() xfs: add trace for read and release of regular file ext4: add trace for the read and release of regular file selftests/bpf: add demo for file read pattern detection
Yufen Yu (1): readahead: introduce FMODE_CTL_WILLNEED to read first 2MB of file
ZhaoLong Wang (2): VFS: Rolling Back the fmode macro definition and structure members selftests/bpf: Update the demo file_read_pattern to run on libbpf 1.0+
fs/ext4/file.c | 4 + fs/read_write.c | 38 +++++ fs/xfs/xfs_file.c | 3 + include/linux/fs.h | 37 +++++ include/trace/events/fs.h | 33 +++++ mm/readahead.c | 40 ++++- tools/testing/selftests/bpf/Makefile | 1 + .../testing/selftests/bpf/file_read_pattern.c | 76 ++++++++++ .../bpf/progs/file_read_pattern_prog.c | 137 ++++++++++++++++++ 9 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/fs.h create mode 100644 tools/testing/selftests/bpf/file_read_pattern.c create mode 100644 tools/testing/selftests/bpf/progs/file_read_pattern_prog.c
From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7Y9JD CVE: NA
-------------------------------------------------
In some scenario, likely spark-sql, almost all meta file's size is less then 2MB and applications read these smaller files in random mode. That means, it may issue multiple times random io to rotate disk, which can cause performance degradation.
To improve the small files random read, we try to read the first 2MB into pagecache on the first time of read. Then it can avoid multiple random io.
In fact, applications can call fadvise system with POSIX_FADV_WILLNEED to achieve this goal. But, some apps may cannot easily do that. So, we provide a new file flag FMODE_CTL_WILLNEED.
Signed-off-by: Yufen Yu yuyufen@huawei.com
Conflicts: include/linux/fs.h mm/readahead.c
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- include/linux/fs.h | 7 +++++++ mm/readahead.c | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index fb5accebdcdf..28b7634a6264 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -185,6 +185,12 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async nowait buffered writes */ #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
+/* File mode control flag, expect random access pattern */ +#define FMODE_CTL_RANDOM ((__force fmode_t)0x1) + +/* File mode control flag, will try to read head of the file into pagecache */ +#define FMODE_CTL_WILLNEED ((__force fmode_t)0x2) + /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -1027,6 +1033,7 @@ struct file { struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ + fmode_t f_ctl_mode; } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
diff --git a/mm/readahead.c b/mm/readahead.c index 6925e6959fd3..a1add9a4f77b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -131,6 +131,7 @@
#include "internal.h"
+#define READAHEAD_FIRST_SIZE (2 * 1024 * 1024) /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -668,10 +669,41 @@ static void ondemand_readahead(struct readahead_control *ractl, page_cache_ra_order(ractl, ra, order); }
+/* + * Try to read first @ra_size from head of the file. + */ +static bool page_cache_readahead_from_head(struct address_space *mapping, + struct file *filp, pgoff_t offset, + unsigned long req_size, + unsigned long ra_size) +{ + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); + struct file_ra_state *ra = &filp->f_ra; + unsigned long size = min_t(unsigned long, ra_size, + i_size_read(file_inode(filp))); + unsigned long nrpages = round_up(size, PAGE_SIZE); + unsigned long max_pages; + unsigned int offs = 0; + + /* Cannot read date over target size, back to normal way */ + if (offset + req_size > nrpages) + return false; + + max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); + max_pages = min(max_pages, nrpages); + while (offs < nrpages) { + force_page_cache_readahead(mapping, filp, offs, max_pages); + offs += max_pages; + } + return true; +} + void page_cache_sync_ra(struct readahead_control *ractl, unsigned long req_count) { - bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); + bool do_forced_ra = ractl->file && + ((ractl->file->f_mode & FMODE_RANDOM) || + (ractl->file->f_ctl_mode & FMODE_CTL_RANDOM));
/* * Even if readahead is disabled, issue this request as readahead @@ -686,6 +718,12 @@ void page_cache_sync_ra(struct readahead_control *ractl, do_forced_ra = true; }
+ /* try to read first READAHEAD_FIRST_SIZE into pagecache */ + if (ractl->file && (ractl->file->f_ctl_mode & FMODE_CTL_WILLNEED) && + page_cache_readahead_from_head(ractl->mapping, ractl->file, + readahead_index(ractl), req_count, READAHEAD_FIRST_SIZE)) + return; + /* be dumb */ if (do_forced_ra) { force_page_cache_ra(ractl, req_count);
From: Hou Tao houtao1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
---------------------------
Add a writable bare tracepoint fs_file_read() and a bare tracepoint fs_file_release().
A version field is added to fs_file_read() to support extension of fs_file_read_ctx in future.
These two tracepoints need to be exported and will be used by filesystem kernel module.
Signed-off-by: Hou Tao houtao1@huawei.com
Conflicts: include/linux/fs.h
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- fs/read_write.c | 5 +++++ include/linux/fs.h | 17 +++++++++++++++++ include/trace/events/fs.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 include/trace/events/fs.h
diff --git a/fs/read_write.c b/fs/read_write.c index 4771701c896b..e86d268d3ec8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -24,6 +24,8 @@
#include <linux/uaccess.h> #include <asm/unistd.h> +#define CREATE_TRACE_POINTS +#include <trace/events/fs.h>
const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, @@ -1718,3 +1720,6 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
return 0; } + +EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_release); diff --git a/include/linux/fs.h b/include/linux/fs.h index 28b7634a6264..b4e003026fe5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3386,4 +3386,21 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice);
+struct fs_file_read_ctx { + const unsigned char *name; + unsigned int f_ctl_mode; + unsigned int rsvd; + /* clear from f_ctl_mode */ + unsigned int clr_f_ctl_mode; + /* set into f_ctl_mode */ + unsigned int set_f_ctl_mode; + unsigned long key; + /* file size */ + long long i_size; + /* previous page index */ + long long prev_index; + /* current page index */ + long long index; +}; + #endif /* _LINUX_FS_H */ diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h new file mode 100644 index 000000000000..ee82dad9d9da --- /dev/null +++ b/include/trace/events/fs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fs + +#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FS_H + +#include <linux/types.h> +#include <linux/tracepoint.h> +#include <linux/fs.h> + +#undef FS_DECLARE_TRACE +#ifdef DECLARE_TRACE_WRITABLE +#define FS_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) +#else +#define FS_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#endif + +FS_DECLARE_TRACE(fs_file_read, + TP_PROTO(struct fs_file_read_ctx *ctx, int version), + TP_ARGS(ctx, version), + sizeof(struct fs_file_read_ctx)); + +DECLARE_TRACE(fs_file_release, + TP_PROTO(struct inode *inode, struct file *filp), + TP_ARGS(inode, filp)); + +#endif /* _TRACE_FS_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h>
From: Hou Tao houtao1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
---------------------------
fs_file_read_do_trace() uses writable-tracepoint to update f_mode for file read procedure. Also export it to make it being usable for filesystem kernel module.
Signed-off-by: Hou Tao houtao1@huawei.com
Conflicts: include/linux/fs.h
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- fs/read_write.c | 33 +++++++++++++++++++++++++++++++++ include/linux/fs.h | 13 +++++++++++++ 2 files changed, 46 insertions(+)
diff --git a/fs/read_write.c b/fs/read_write.c index e86d268d3ec8..c22965634f54 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1721,5 +1721,38 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; }
+#ifdef CONFIG_TRACEPOINTS +static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx, + struct file *filp, loff_t pos) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->name = file_dentry(filp)->d_name.name; + ctx->f_ctl_mode = filp->f_ctl_mode; + ctx->key = (unsigned long)filp; + ctx->i_size = i_size_read(file_inode(filp)); + ctx->prev_index = filp->f_ra.prev_pos >> PAGE_SHIFT; + ctx->index = pos >> PAGE_SHIFT; +} + +#define FS_FILE_READ_VERSION 1 +#define FS_FILE_READ_MODE_MASK (FMODE_CTL_RANDOM | FMODE_CTL_WILLNEED) + +void fs_file_read_update_args_by_trace(struct kiocb *iocb) +{ + struct file *filp = iocb->ki_filp; + struct fs_file_read_ctx ctx; + + fs_file_read_ctx_init(&ctx, filp, iocb->ki_pos); + trace_fs_file_read(&ctx, FS_FILE_READ_VERSION); + + if (!ctx.set_f_ctl_mode && !ctx.clr_f_ctl_mode) + return; + + filp->f_ctl_mode |= ctx.set_f_ctl_mode & FS_FILE_READ_MODE_MASK; + filp->f_ctl_mode &= ~(ctx.clr_f_ctl_mode & FS_FILE_READ_MODE_MASK); +} +EXPORT_SYMBOL_GPL(fs_file_read_update_args_by_trace); +#endif + EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_read); EXPORT_TRACEPOINT_SYMBOL_GPL(fs_file_release); diff --git a/include/linux/fs.h b/include/linux/fs.h index b4e003026fe5..839f2a6dacd6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,7 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/tracepoint-defs.h>
#include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -3403,4 +3404,16 @@ struct fs_file_read_ctx { long long index; };
+#ifdef CONFIG_TRACEPOINTS +DECLARE_TRACEPOINT(fs_file_read); +extern void fs_file_read_update_args_by_trace(struct kiocb *iocb); +#else +static inline void fs_file_read_update_args_by_trace(struct kiocb *iocb) {} +#endif + +static inline void fs_file_read_do_trace(struct kiocb *iocb) +{ + if (tracepoint_enabled(fs_file_read)) + fs_file_read_update_args_by_trace(iocb); +} #endif /* _LINUX_FS_H */
From: Hou Tao houtao1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
---------------------------
Use fs_file_read_do_trace() and trace_fs_file_release() to do that.
Signed-off-by: Hou Tao houtao1@huawei.com
Conflicts: fs/xfs/xfs_file.c
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- fs/xfs/xfs_file.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 203700278ddb..5dd596555325 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -31,6 +31,7 @@ #include <linux/mman.h> #include <linux/fadvise.h> #include <linux/mount.h> +#include <trace/events/fs.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -270,6 +271,7 @@ xfs_file_buffered_read( ssize_t ret;
trace_xfs_file_buffered_read(iocb, to); + fs_file_read_do_trace(iocb);
ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); if (ret) @@ -1227,6 +1229,7 @@ xfs_file_release( struct inode *inode, struct file *filp) { + trace_fs_file_release(inode, filp); return xfs_release(XFS_I(inode)); }
From: Hou Tao houtao1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
---------------------------
Use fs_file_read_do_trace() and trace_fs_file_release() to do that.
Signed-off-by: Hou Tao houtao1@huawei.com Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- fs/ext4/file.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0166bb9ca160..33f8eb715b41 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -30,6 +30,7 @@ #include <linux/uio.h> #include <linux/mman.h> #include <linux/backing-dev.h> +#include <trace/events/fs.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -144,6 +145,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_flags & IOCB_DIRECT) return ext4_dio_read_iter(iocb, to);
+ fs_file_read_do_trace(iocb); return generic_file_read_iter(iocb, to); }
@@ -165,6 +167,8 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos, */ static int ext4_release_file(struct inode *inode, struct file *filp) { + trace_fs_file_release(inode, filp); + if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
From: Hou Tao houtao1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
---------------------------
It attaches eBPF program into fs_file_read() and fs_file_release() respectively. The program for fs_file_read() will record read history, calculate read pattern and set f_mode for specific file, And program for fs_file_release() will clean the saved read history.
Signed-off-by: Hou Tao houtao1@huawei.com
Conflicts: tools/testing/selftests/bpf/Makefile
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- tools/testing/selftests/bpf/Makefile | 1 + .../testing/selftests/bpf/file_read_pattern.c | 73 +++++++++ .../bpf/progs/file_read_pattern_prog.c | 138 ++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 tools/testing/selftests/bpf/file_read_pattern.c create mode 100644 tools/testing/selftests/bpf/progs/file_read_pattern_prog.c
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index caede9b574cb..c521fb5928c4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -50,6 +50,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_cgroup_storage \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 +TEST_GEN_PROGS += file_read_pattern TEST_INST_SUBDIRS := no_alu32
# Also test bpf-gcc, if present diff --git a/tools/testing/selftests/bpf/file_read_pattern.c b/tools/testing/selftests/bpf/file_read_pattern.c new file mode 100644 index 000000000000..81e3a49f0424 --- /dev/null +++ b/tools/testing/selftests/bpf/file_read_pattern.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/err.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" + +#define READ_TP_NAME "fs_file_read" +#define RELEASE_TP_NAME "fs_file_release" + +int main(int argc, char *argv[]) +{ + const char *name = "./file_read_pattern_prog.o"; + struct bpf_object *obj; + const char *prog_name; + struct bpf_program *prog; + int unused; + int err; + int read_fd; + int release_fd; + + err = bpf_prog_load(name, BPF_PROG_TYPE_UNSPEC, &obj, &unused); + if (err) { + printf("Failed to load program\n"); + return err; + } + + prog_name = "raw_tracepoint.w/" READ_TP_NAME; + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + printf("no prog %s\n", prog_name); + err = -EINVAL; + goto out; + } + + read_fd = bpf_raw_tracepoint_open(READ_TP_NAME, bpf_program__fd(prog)); + if (read_fd < 0) { + err = -errno; + printf("Failed to attach raw tracepoint %s\n", READ_TP_NAME); + goto out; + } + + prog_name = "raw_tracepoint/" RELEASE_TP_NAME; + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + printf("no prog %s\n", prog_name); + err = -EINVAL; + goto out; + } + + release_fd = bpf_raw_tracepoint_open(RELEASE_TP_NAME, + bpf_program__fd(prog)); + if (release_fd < 0) { + err = -errno; + printf("Failed to attach raw tracepoint %s\n", RELEASE_TP_NAME); + goto out; + } + + pause(); + + close(release_fd); + close(read_fd); +out: + bpf_object__close(obj); + return err; +} diff --git a/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c new file mode 100644 index 000000000000..834a68add142 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +/* Need to keep consistent with definitions in include/linux/fs.h */ +#define FMODE_CTL_RANDOM 0x1 +#define FMODE_CTL_WILLNEED 0x2 + +struct fs_file_read_ctx { + const unsigned char *name; + unsigned int f_ctl_mode; + unsigned int rsvd; + /* clear from f_ctl_mode */ + unsigned int clr_f_ctl_mode; + /* set into f_ctl_mode */ + unsigned int set_f_ctl_mode; + unsigned long key; + /* file size */ + long long i_size; + /* previous page index */ + long long prev_index; + /* current page index */ + long long index; +}; + +struct fs_file_read_args { + struct fs_file_read_ctx *ctx; + int version; +}; + +struct fs_file_release_args { + void *inode; + void *filp; +}; + +struct file_rd_hist { + __u64 last_nsec; + __u32 seq_nr; + __u32 tot_nr; +}; + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(struct file_rd_hist), + .max_entries = 10000, +}; + +static bool is_expected_file(void *name) +{ + char prefix[5]; + int err; + + err = bpf_probe_read_str(&prefix, sizeof(prefix), name); + if (err <= 0) + return false; + return !strncmp(prefix, "blk_", 4); +} + +SEC("raw_tracepoint.w/fs_file_read") +int fs_file_read(struct fs_file_read_args *args) +{ + const char fmt[] = "elapsed %llu, seq %u, tot %u\n"; + struct fs_file_read_ctx *rd_ctx = args->ctx; + struct file_rd_hist *hist; + struct file_rd_hist new_hist; + __u64 key; + __u64 now; + bool first; + + if (!is_expected_file((void *)rd_ctx->name)) + return 0; + + if (rd_ctx->i_size <= (4 << 20)) { + rd_ctx->set_f_ctl_mode = FMODE_CTL_WILLNEED; + return 0; + } + + first = false; + now = bpf_ktime_get_ns(); + key = rd_ctx->key; + hist = bpf_map_lookup_elem(&htab, &key); + if (!hist) { + __builtin_memset(&new_hist, 0, sizeof(new_hist)); + new_hist.last_nsec = now; + first = true; + hist = &new_hist; + } + + if (rd_ctx->index >= rd_ctx->prev_index && + rd_ctx->index - rd_ctx->prev_index <= 1) + hist->seq_nr += 1; + hist->tot_nr += 1; + + bpf_trace_printk(fmt, sizeof(fmt), now - hist->last_nsec, + hist->seq_nr, hist->tot_nr); + + if (first) { + bpf_map_update_elem(&htab, &key, hist, 0); + return 0; + } + + /* 500ms or 10 read */ + if (now - hist->last_nsec >= 500000000ULL || hist->tot_nr >= 10) { + if (hist->tot_nr >= 10) { + if (hist->seq_nr <= hist->tot_nr * 3 / 10) + rd_ctx->set_f_ctl_mode = FMODE_CTL_RANDOM; + else if (hist->seq_nr >= hist->tot_nr * 7 / 10) + rd_ctx->clr_f_ctl_mode = FMODE_CTL_RANDOM; + } + + hist->last_nsec = now; + hist->tot_nr = 0; + hist->seq_nr = 0; + } + + return 0; +} + +SEC("raw_tracepoint/fs_file_release") +int fs_file_release(struct fs_file_release_args *args) +{ + __u64 key = (unsigned long)args->filp; + void *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (value) + bpf_map_delete_elem(&htab, &key); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1;
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
-----------------------------------------
The readahead feature of the openEuler-22.03-LTS ebpf enhancement involves interface changes and needs to be compatible with the ebpf tool of openEuler-1.0-LTS. This patch changes the _ctl_mode to _mode of fs_file_read_ctx structure.
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com Reviewed-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/read_write.c | 8 ++++---- include/linux/fs.h | 10 +++++----- .../selftests/bpf/progs/file_read_pattern_prog.c | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c index c22965634f54..3d69fb284d10 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1727,7 +1727,7 @@ static void fs_file_read_ctx_init(struct fs_file_read_ctx *ctx, { memset(ctx, 0, sizeof(*ctx)); ctx->name = file_dentry(filp)->d_name.name; - ctx->f_ctl_mode = filp->f_ctl_mode; + ctx->f_mode = filp->f_mode; ctx->key = (unsigned long)filp; ctx->i_size = i_size_read(file_inode(filp)); ctx->prev_index = filp->f_ra.prev_pos >> PAGE_SHIFT; @@ -1745,11 +1745,11 @@ void fs_file_read_update_args_by_trace(struct kiocb *iocb) fs_file_read_ctx_init(&ctx, filp, iocb->ki_pos); trace_fs_file_read(&ctx, FS_FILE_READ_VERSION);
- if (!ctx.set_f_ctl_mode && !ctx.clr_f_ctl_mode) + if (!ctx.set_f_mode && !ctx.clr_f_mode) return;
- filp->f_ctl_mode |= ctx.set_f_ctl_mode & FS_FILE_READ_MODE_MASK; - filp->f_ctl_mode &= ~(ctx.clr_f_ctl_mode & FS_FILE_READ_MODE_MASK); + filp->f_ctl_mode |= ctx.set_f_mode & FS_FILE_READ_MODE_MASK; + filp->f_ctl_mode &= ~(ctx.clr_f_mode & FS_FILE_READ_MODE_MASK); } EXPORT_SYMBOL_GPL(fs_file_read_update_args_by_trace); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 839f2a6dacd6..d47063a79bcd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -187,10 +187,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
/* File mode control flag, expect random access pattern */ -#define FMODE_CTL_RANDOM ((__force fmode_t)0x1) +#define FMODE_CTL_RANDOM ((__force fmode_t)0x1000)
/* File mode control flag, will try to read head of the file into pagecache */ -#define FMODE_CTL_WILLNEED ((__force fmode_t)0x2) +#define FMODE_CTL_WILLNEED ((__force fmode_t)0x400000)
/* * Attribute flags. These should be or-ed together to figure out what @@ -3389,12 +3389,12 @@ extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
struct fs_file_read_ctx { const unsigned char *name; - unsigned int f_ctl_mode; + unsigned int f_mode; unsigned int rsvd; /* clear from f_ctl_mode */ - unsigned int clr_f_ctl_mode; + unsigned int clr_f_mode; /* set into f_ctl_mode */ - unsigned int set_f_ctl_mode; + unsigned int set_f_mode; unsigned long key; /* file size */ long long i_size; diff --git a/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c index 834a68add142..17c47ed63531 100644 --- a/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c +++ b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c @@ -7,17 +7,17 @@ #include <bpf/bpf_helpers.h>
/* Need to keep consistent with definitions in include/linux/fs.h */ -#define FMODE_CTL_RANDOM 0x1 -#define FMODE_CTL_WILLNEED 0x2 +#define FMODE_CTL_RANDOM 0x1000 +#define FMODE_CTL_WILLNEED 0x400000
struct fs_file_read_ctx { const unsigned char *name; - unsigned int f_ctl_mode; + unsigned int f_mode; unsigned int rsvd; /* clear from f_ctl_mode */ - unsigned int clr_f_ctl_mode; + unsigned int clr_f_mode; /* set into f_ctl_mode */ - unsigned int set_f_ctl_mode; + unsigned int set_f_mode; unsigned long key; /* file size */ long long i_size; @@ -76,7 +76,7 @@ int fs_file_read(struct fs_file_read_args *args) return 0;
if (rd_ctx->i_size <= (4 << 20)) { - rd_ctx->set_f_ctl_mode = FMODE_CTL_WILLNEED; + rd_ctx->set_f_mode = FMODE_CTL_WILLNEED; return 0; }
@@ -108,9 +108,9 @@ int fs_file_read(struct fs_file_read_args *args) if (now - hist->last_nsec >= 500000000ULL || hist->tot_nr >= 10) { if (hist->tot_nr >= 10) { if (hist->seq_nr <= hist->tot_nr * 3 / 10) - rd_ctx->set_f_ctl_mode = FMODE_CTL_RANDOM; + rd_ctx->set_f_mode = FMODE_CTL_RANDOM; else if (hist->seq_nr >= hist->tot_nr * 7 / 10) - rd_ctx->clr_f_ctl_mode = FMODE_CTL_RANDOM; + rd_ctx->clr_f_mode = FMODE_CTL_RANDOM; }
hist->last_nsec = now;
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8O4U8 CVE: NA
-----------------------------------------
Some APIs are removed and some APIs are changed from libbpf 1.0 or later. The purpose of this patch is to enable the demo to run successfully on libbpf 1.0+.
For details about API changes, see the following link: https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com --- .../testing/selftests/bpf/file_read_pattern.c | 41 ++++++++++--------- .../bpf/progs/file_read_pattern_prog.c | 13 +++--- 2 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/tools/testing/selftests/bpf/file_read_pattern.c b/tools/testing/selftests/bpf/file_read_pattern.c index 81e3a49f0424..5ee9e1a18ae5 100644 --- a/tools/testing/selftests/bpf/file_read_pattern.c +++ b/tools/testing/selftests/bpf/file_read_pattern.c @@ -10,56 +10,59 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h>
-#include "bpf_rlimit.h" - #define READ_TP_NAME "fs_file_read" #define RELEASE_TP_NAME "fs_file_release"
int main(int argc, char *argv[]) { - const char *name = "./file_read_pattern_prog.o"; + const char *name = "./file_read_pattern_prog.bpf.o"; struct bpf_object *obj; - const char *prog_name; struct bpf_program *prog; - int unused; - int err; + int err = 0; int read_fd; int release_fd;
- err = bpf_prog_load(name, BPF_PROG_TYPE_UNSPEC, &obj, &unused); - if (err) { - printf("Failed to load program\n"); + obj = bpf_object__open_file(name, NULL); + if (!obj) { + printf("Failed to open program: %s\n", name); + err = -errno; return err; }
- prog_name = "raw_tracepoint.w/" READ_TP_NAME; - prog = bpf_object__find_program_by_title(obj, prog_name); + err = bpf_object__load(obj); + if (err) { + printf("failed to load program: %s\n", name); + goto out; + } + + prog = bpf_object__find_program_by_name(obj, READ_TP_NAME); if (!prog) { - printf("no prog %s\n", prog_name); - err = -EINVAL; + printf("no prog %s\n", READ_TP_NAME); + err = -errno; goto out; }
read_fd = bpf_raw_tracepoint_open(READ_TP_NAME, bpf_program__fd(prog)); if (read_fd < 0) { - err = -errno; printf("Failed to attach raw tracepoint %s\n", READ_TP_NAME); + err = read_fd; goto out; }
- prog_name = "raw_tracepoint/" RELEASE_TP_NAME; - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, RELEASE_TP_NAME); if (!prog) { - printf("no prog %s\n", prog_name); - err = -EINVAL; + printf("no prog %s\n", RELEASE_TP_NAME); + err = -errno; + close(read_fd); goto out; }
release_fd = bpf_raw_tracepoint_open(RELEASE_TP_NAME, bpf_program__fd(prog)); if (release_fd < 0) { - err = -errno; printf("Failed to attach raw tracepoint %s\n", RELEASE_TP_NAME); + err = release_fd; + close(read_fd); goto out; }
diff --git a/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c index 17c47ed63531..bd9c43a5a06b 100644 --- a/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c +++ b/tools/testing/selftests/bpf/progs/file_read_pattern_prog.c @@ -43,12 +43,12 @@ struct file_rd_hist { __u32 tot_nr; };
-struct bpf_map_def SEC("maps") htab = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(long), - .value_size = sizeof(struct file_rd_hist), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct file_rd_hist); + __uint(max_entries, 10000); +} htab SEC(".maps");
static bool is_expected_file(void *name) { @@ -135,4 +135,3 @@ int fs_file_release(struct fs_file_release_args *args) }
char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3692 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/S...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3692 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/S...