[OLK-6.6 0/7] xcall2.0: bugfix for prefetch ko
Xinyu Zheng (7): xcall2.0: fix value name typos in __do_sys_epoll_pwait xcall2.0: prefetch: keep prefetch module name same with file name xcall2.0: prefetch: fix memory leak when release prefetch item through mmu notifier xcall2.0: add a per xcall_area mmu notifier for kernel module to do release callback xcall2.0: prefetch: set MAX support prefetch fd number to 1024 xcall2.0: prefetch: epoll_ctl no need to occupy a file refcount xcall2.0: add xcall_subdir_create and xcall_proc_create arch/arm64/include/asm/xcall.h | 2 + arch/arm64/kernel/xcall/core.c | 3 + arch/arm64/kernel/xcall/proc.c | 17 ++++- drivers/staging/xcall/prefetch.c | 109 +++++++++++++++++++------------ include/linux/xcall.h | 4 ++ 5 files changed, 90 insertions(+), 45 deletions(-) -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- fix value name typos Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 923ca2a23c98..649ababa1aa0 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -400,7 +400,7 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) void __user *buf = (void *)regs->regs[1]; struct prefetch_item *pfi = NULL; struct epoll_event events[MAX_FD] = {0}; - int i, fd, cpu, prefech_task_num; + int i, fd, cpu, prefetch_task_num; long ret; ret = default_sys_call_table()[__NR_epoll_pwait](regs); @@ -410,11 +410,11 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) if (!current_prefetch_items()) return ret; - prefech_task_num = ret > MAX_FD ? MAX_FD : ret; - if (copy_from_user(events, buf, prefech_task_num * sizeof(struct epoll_event))) + prefetch_task_num = ret > MAX_FD ? MAX_FD : ret; + if (copy_from_user(events, buf, prefetch_task_num * sizeof(struct epoll_event))) return ret; - for (i = 0; i < prefech_task_num; i++) { + for (i = 0; i < prefetch_task_num; i++) { fd = events[i].data; if (!(events[i].events & EPOLLIN) || fd >= MAX_FD) continue; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- We should be better keep module name same with kernel module filename. It will be easier for user to register xcall to this module through /proc/xcall/comm, or they may should double check the module name through source file. Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 649ababa1aa0..61d0ab85fd30 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -474,7 +474,7 @@ static long __do_sys_read(struct pt_regs *regs) /* MANDATORY */ struct xcall_prog xcall_prefetch_prog = { - .name = "xcall_prefetch", + .name = "prefetch", .owner = THIS_MODULE, .objs = { { -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Free prefetch item data buffer page before free xcall_area syscall private data. Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 61d0ab85fd30..5486684bea95 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -200,17 +200,27 @@ static int get_async_prefetch_cpu(struct prefetch_item *pfi) return pfi->cpu; } -static void xcall_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) +static void prefetch_pfi_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct xcall_area *area = mm_xcall_area(mm); - void *area_private_data = NULL; + struct prefetch_item *prefetch_items = NULL; + int i; + + prefetch_items = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); + if (!prefetch_items) + return; - area_private_data = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); - kfree(area_private_data); + for (i = 0; i < MAX_FD; i++) { + cancel_work_sync(&prefetch_items[i].work); + if (prefetch_items[i].cache_pages) + __free_pages(prefetch_items[i].cache_pages, XCALL_CACHE_PAGE_ORDER); + prefetch_items[i].cache = NULL; + } + kfree(prefetch_items); } static struct mmu_notifier_ops xcall_mmu_notifier_ops = { - .release = xcall_mm_release, + .release = prefetch_pfi_release, }; static struct mmu_notifier xcall_mmu_notifier = { -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- We add a xcall mmu notifier in xcall_area to provide a window for each hijacked process to release their own private data. You should register your notifier callback in kernel module. And the notifier unregister will be done in clear_xcall_area. Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- arch/arm64/include/asm/xcall.h | 2 ++ arch/arm64/kernel/xcall/core.c | 3 +++ drivers/staging/xcall/prefetch.c | 7 ++----- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index c9143b7d2096..b6bcc120575a 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -4,6 +4,7 @@ #include <linux/jump_label.h> #include <linux/mm_types.h> +#include <linux/mmu_notifier.h> #include <linux/sched.h> #include <linux/xcall.h> #include <linux/refcount.h> @@ -48,6 +49,7 @@ struct xcall_area { refcount_t ref; struct xcall *xcall; void *sys_call_data[NR_syscalls]; + struct mmu_notifier xcall_mmu_notifier; }; extern const syscall_fn_t *default_sys_call_table(void); diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index a88c4ed6e575..8a3e984f2f6b 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -277,6 +277,9 @@ void clear_xcall_area(struct mm_struct *mm) if (!refcount_dec_and_test(&area->ref)) return; + if (area->xcall_mmu_notifier.mm == mm && atomic_read(&mm->mm_count) > 1) + mmu_notifier_unregister(&area->xcall_mmu_notifier, mm); + if (area->xcall) put_xcall(area->xcall); diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 5486684bea95..4e372767f5b3 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -223,10 +223,6 @@ static struct mmu_notifier_ops xcall_mmu_notifier_ops = { .release = prefetch_pfi_release, }; -static struct mmu_notifier xcall_mmu_notifier = { - .ops = &xcall_mmu_notifier_ops, -}; - static void xcall_cancel_work(unsigned int fd) { struct prefetch_item *pfi = current_prefetch_items() + fd; @@ -364,7 +360,8 @@ static long __do_sys_epoll_create(struct pt_regs *regs) items[i].file = NULL; set_prefetch_numa_cpu(&items[i]); } - mmu_notifier_register(&xcall_mmu_notifier, current->mm); + area->xcall_mmu_notifier.ops = &xcall_mmu_notifier_ops; + mmu_notifier_register(&area->xcall_mmu_notifier, current->mm); return ret; } -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- In prefetch ko, we increase the support prefetch fd number to 1024. Then we should use xcall_area's epoll_ctl private data area to store a event_poll array, which we will use this array to copy fd list from user in epollwait function. Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 37 ++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 4e372767f5b3..90ebd6b0f629 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -17,11 +17,15 @@ #include <asm/xcall.h> -#define MAX_FD 100 +#define MAX_FD 1024 #define XCALL_CACHE_PAGE_ORDER 2 #define XCALL_CACHE_BUF_SIZE ((1 << XCALL_CACHE_PAGE_ORDER) * PAGE_SIZE) +#define current_epoll_events() \ + ((struct epoll_event *) \ + ((((struct xcall_area *)(current->mm->xcall))->sys_call_data)[__NR_epoll_ctl])) + #define current_prefetch_items() \ ((struct prefetch_item *) \ ((((struct xcall_area *)(current->mm->xcall))->sys_call_data)[__NR_epoll_pwait])) @@ -204,8 +208,13 @@ static void prefetch_pfi_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct xcall_area *area = mm_xcall_area(mm); struct prefetch_item *prefetch_items = NULL; + struct epoll_event *events = NULL; int i; + events = xchg(&area->sys_call_data[__NR_epoll_ctl], NULL); + if (events) + kfree(events); + prefetch_items = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); if (!prefetch_items) return; @@ -327,22 +336,25 @@ static long __do_sys_epoll_create(struct pt_regs *regs) int i; struct xcall_area *area = mm_xcall_area(current->mm); struct prefetch_item *items = NULL; + struct epoll_events *events = NULL; ret = default_sys_call_table()[__NR_epoll_create1](regs); if (ret < 0) return ret; - if (current_prefetch_items()) return ret; + events = kcalloc(MAX_FD, sizeof(struct epoll_event), GFP_KERNEL); + if (!events) + return ret; + if (cmpxchg(&area->sys_call_data[__NR_epoll_ctl], NULL, events)) + goto free_events; + items = kcalloc(MAX_FD, sizeof(struct prefetch_item), GFP_KERNEL); if (!items) - return -ENOMEM; - - if (cmpxchg(&area->sys_call_data[__NR_epoll_pwait], NULL, items)) { - kfree(items); - return ret; - } + goto free_events; + if (cmpxchg(&area->sys_call_data[__NR_epoll_pwait], NULL, items)) + goto free_items; for (i = 0; i < MAX_FD; i++) { items[i].cache_pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, @@ -360,9 +372,16 @@ static long __do_sys_epoll_create(struct pt_regs *regs) items[i].file = NULL; set_prefetch_numa_cpu(&items[i]); } + area->xcall_mmu_notifier.ops = &xcall_mmu_notifier_ops; mmu_notifier_register(&area->xcall_mmu_notifier, current->mm); return ret; + +free_items: + kfree(items); +free_events: + kfree(events); + return ret; } static long __do_sys_epoll_ctl(struct pt_regs *regs) @@ -406,7 +425,7 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) { void __user *buf = (void *)regs->regs[1]; struct prefetch_item *pfi = NULL; - struct epoll_event events[MAX_FD] = {0}; + struct epoll_event *events = current_epoll_events(); int i, fd, cpu, prefetch_task_num; long ret; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- My test is restart redis server and send kill command to kill it repeatedly. Then I found when the second time to start redis, it cannot run and reported 6379 port is been occpuied. It means 6379 port refcount is not dec to 0. In epoll_ctl(), it will fget file then fput it in close(). But if we kill the process, it has no chance to do the close() in prefetch module. Thus, the 6379 port is not released. Since we only need the struct file address, actually we don't need to add the refcount. Just fget then fput both in epoll_ctl(). Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 90ebd6b0f629..f444a78d2a66 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -406,12 +406,10 @@ static long __do_sys_epoll_ctl(struct pt_regs *regs) if (!file) return ret; - if (!sock_from_file(file)) { - fput(file); - return ret; - } - if (cmpxchg(&pfi->file, NULL, file)) - fput(file); + if (sock_from_file(file)) + cmpxchg(&pfi->file, NULL, file); + + fput(file); break; case EPOLL_CTL_DEL: xcall_cancel_work(fd); @@ -472,7 +470,6 @@ static long __do_sys_close(struct pt_regs *regs) pfi_old_file = pfi->file; pfi_new_file = cmpxchg(&pfi->file, pfi_old_file, NULL); if (pfi_new_file == pfi_old_file) { - fput(pfi_old_file); atomic_set(&pfi->state, XCALL_CACHE_NONE); pfi->len = 0; pfi->pos = 0; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Export two function to kernel module, then each module can create their private procfs below the /proc/xcall path, which is the root path for xcall Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- arch/arm64/kernel/xcall/proc.c | 17 +++++++++++++++-- drivers/staging/xcall/prefetch.c | 24 ++++++++++++------------ include/linux/xcall.h | 4 ++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/xcall/proc.c b/arch/arm64/kernel/xcall/proc.c index 12032120c7d6..0e3b714ac730 100644 --- a/arch/arm64/kernel/xcall/proc.c +++ b/arch/arm64/kernel/xcall/proc.c @@ -16,6 +16,8 @@ static LIST_HEAD(comm_list); static DECLARE_RWSEM(comm_rwsem); +struct proc_dir_entry *root_xcall_dir; + static void free_xcall_comm(struct xcall_comm *info) { if (!info) @@ -213,6 +215,19 @@ static ssize_t xcall_comm_write(struct file *file, return ret ? ret : nbytes; } +struct proc_dir_entry *xcall_subdir_create(const char *name) +{ + return proc_mkdir(name, root_xcall_dir); +} +EXPORT_SYMBOL(xcall_subdir_create); + +struct proc_dir_entry *xcall_proc_create(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct proc_ops *proc_ops) +{ + return proc_create(name, mode, parent, proc_ops); +} +EXPORT_SYMBOL(xcall_proc_create); + static const struct proc_ops xcall_comm_ops = { .proc_open = xcall_comm_open, .proc_read = seq_read, @@ -223,8 +238,6 @@ static const struct proc_ops xcall_comm_ops = { static int __init xcall_proc_init(void) { - struct proc_dir_entry *root_xcall_dir; - if (!static_key_enabled(&xcall_enable)) return 0; diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index f444a78d2a66..572bc4c90690 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, xcall_cache_miss); static struct workqueue_struct *rc_work; static struct cpumask xcall_mask; -struct proc_dir_entry *xcall_proc_dir, *prefetch_dir, *xcall_mask_dir; +struct proc_dir_entry *prefetch_proc_dir, *prefetch_dir, *xcall_mask_dir; enum cache_state { XCALL_CACHE_NONE = 0, @@ -526,14 +526,14 @@ struct xcall_prog xcall_prefetch_prog = { static int __init init_xcall_prefetch_procfs(void) { - xcall_proc_dir = proc_mkdir("xcall_feature", NULL); - if (!xcall_proc_dir) + prefetch_proc_dir = xcall_subdir_create("prefetch"); + if (!prefetch_proc_dir) return -ENOMEM; - prefetch_dir = proc_create("prefetch", 0640, xcall_proc_dir, - &xcall_prefetch_fops); + prefetch_dir = xcall_proc_create("prefetch", 0640, prefetch_proc_dir, + &xcall_prefetch_fops); if (!prefetch_dir) - goto rm_xcall_proc_dir; - xcall_mask_dir = proc_create("cpu_list", 0640, xcall_proc_dir, + goto rm_prefetch_proc_dir; + xcall_mask_dir = proc_create("cpu_list", 0640, prefetch_proc_dir, &xcall_mask_fops); if (!xcall_mask_dir) goto rm_prefetch_dir; @@ -543,8 +543,8 @@ static int __init init_xcall_prefetch_procfs(void) rm_prefetch_dir: proc_remove(prefetch_dir); -rm_xcall_proc_dir: - proc_remove(xcall_proc_dir); +rm_prefetch_proc_dir: + proc_remove(prefetch_proc_dir); return -ENOMEM; } @@ -572,7 +572,7 @@ static int __init xcall_prefetch_init(void) remove_dir: proc_remove(prefetch_dir); proc_remove(xcall_mask_dir); - proc_remove(xcall_proc_dir); + proc_remove(prefetch_proc_dir); destroy_queue: destroy_workqueue(rc_work); return ret; @@ -587,8 +587,8 @@ static void __exit xcall_prefetch_exit(void) proc_remove(prefetch_dir); if (xcall_mask_dir) proc_remove(xcall_mask_dir); - if (xcall_proc_dir) - proc_remove(xcall_proc_dir); + if (prefetch_proc_dir) + proc_remove(prefetch_proc_dir); xcall_prog_unregister(&xcall_prefetch_prog); } diff --git a/include/linux/xcall.h b/include/linux/xcall.h index 510aebe4e7c0..215542097dfa 100644 --- a/include/linux/xcall.h +++ b/include/linux/xcall.h @@ -7,6 +7,7 @@ #define _LINUX_XCALL_H #include <linux/module.h> +#include <linux/proc_fs.h> struct vm_area_struct; struct mm_struct; @@ -32,6 +33,9 @@ struct xcall_prog { extern int xcall_prog_register(struct xcall_prog *prog); extern void xcall_prog_unregister(struct xcall_prog *prog); extern void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p); +struct proc_dir_entry *xcall_subdir_create(const char *name); +struct proc_dir_entry *xcall_proc_create(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct proc_ops *proc_ops); extern void clear_xcall_area(struct mm_struct *mm); extern int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm); #else /* !CONFIG_DYNAMIC_XCALL */ -- 2.34.1
participants (1)
-
Xinyu Zheng