[PATCH OLK-6.6 v2 0/6] xcall2.0: bugfix for prefetch ko
Xinyu Zheng (6): xcall2.0: prefetch: fix value name typos in __do_sys_epoll_pwait xcall2.0: prefetch: keep prefetch module name same with file name xcall2.0: prefetch: fix memory leak when release prefetch item through mmu notifier xcall2.0: add xcall_subdir_create help to create subdir below /proc/xcall xcall2.0: prefetch: epoll_ctl no need to occupy a file refcount xcall2.0: prefetch: introduce struct prefetch_mm_data arch/arm64/kernel/xcall/proc.c | 10 +- drivers/staging/xcall/prefetch.c | 163 ++++++++++++++++++++----------- include/linux/xcall.h | 1 + 3 files changed, 114 insertions(+), 60 deletions(-) -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- fix value name typos Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 923ca2a23c98..649ababa1aa0 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -400,7 +400,7 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) void __user *buf = (void *)regs->regs[1]; struct prefetch_item *pfi = NULL; struct epoll_event events[MAX_FD] = {0}; - int i, fd, cpu, prefech_task_num; + int i, fd, cpu, prefetch_task_num; long ret; ret = default_sys_call_table()[__NR_epoll_pwait](regs); @@ -410,11 +410,11 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) if (!current_prefetch_items()) return ret; - prefech_task_num = ret > MAX_FD ? MAX_FD : ret; - if (copy_from_user(events, buf, prefech_task_num * sizeof(struct epoll_event))) + prefetch_task_num = ret > MAX_FD ? MAX_FD : ret; + if (copy_from_user(events, buf, prefetch_task_num * sizeof(struct epoll_event))) return ret; - for (i = 0; i < prefech_task_num; i++) { + for (i = 0; i < prefetch_task_num; i++) { fd = events[i].data; if (!(events[i].events & EPOLLIN) || fd >= MAX_FD) continue; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- We should be better keep module name same with kernel module filename. It will be easier for user to register xcall to this module through /proc/xcall/comm, or they may should double check the module name through source file. Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index 649ababa1aa0..f096c314cf4d 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -474,7 +474,7 @@ static long __do_sys_read(struct pt_regs *regs) /* MANDATORY */ struct xcall_prog xcall_prefetch_prog = { - .name = "xcall_prefetch", + .name = "prefetch", .owner = THIS_MODULE, .objs = { { @@ -572,6 +572,4 @@ static void __exit xcall_prefetch_exit(void) module_init(xcall_prefetch_init); module_exit(xcall_prefetch_exit); -MODULE_AUTHOR(""); -MODULE_DESCRIPTION("Xcall Prefetch"); MODULE_LICENSE("GPL"); -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Free prefetch item data buffer page before free xcall_area syscall private data. Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index f096c314cf4d..fdc27543d95d 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -200,17 +200,27 @@ static int get_async_prefetch_cpu(struct prefetch_item *pfi) return pfi->cpu; } -static void xcall_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) +static void prefetch_pfi_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct xcall_area *area = mm_xcall_area(mm); - void *area_private_data = NULL; + struct prefetch_item *prefetch_items = NULL; + int i; + + prefetch_items = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); + if (!prefetch_items) + return; - area_private_data = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); - kfree(area_private_data); + for (i = 0; i < MAX_FD; i++) { + cancel_work_sync(&prefetch_items[i].work); + if (prefetch_items[i].cache_pages) + __free_pages(prefetch_items[i].cache_pages, XCALL_CACHE_PAGE_ORDER); + prefetch_items[i].cache = NULL; + } + kfree(prefetch_items); } static struct mmu_notifier_ops xcall_mmu_notifier_ops = { - .release = xcall_mm_release, + .release = prefetch_pfi_release, }; static struct mmu_notifier xcall_mmu_notifier = { -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Export xcall_subdir_create function to kernel module, then each module can create their private procfs below the /proc/xcall path, which is the root path for xcall Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- arch/arm64/kernel/xcall/proc.c | 10 ++++++++-- drivers/staging/xcall/prefetch.c | 2 +- include/linux/xcall.h | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/xcall/proc.c b/arch/arm64/kernel/xcall/proc.c index 12032120c7d6..54ca0d53908f 100644 --- a/arch/arm64/kernel/xcall/proc.c +++ b/arch/arm64/kernel/xcall/proc.c @@ -16,6 +16,8 @@ static LIST_HEAD(comm_list); static DECLARE_RWSEM(comm_rwsem); +struct proc_dir_entry *root_xcall_dir; + static void free_xcall_comm(struct xcall_comm *info) { if (!info) @@ -213,6 +215,12 @@ static ssize_t xcall_comm_write(struct file *file, return ret ? ret : nbytes; } +struct proc_dir_entry *xcall_subdir_create(const char *name) +{ + return proc_mkdir(name, root_xcall_dir); +} +EXPORT_SYMBOL(xcall_subdir_create); + static const struct proc_ops xcall_comm_ops = { .proc_open = xcall_comm_open, .proc_read = seq_read, @@ -223,8 +231,6 @@ static const struct proc_ops xcall_comm_ops = { static int __init xcall_proc_init(void) { - struct proc_dir_entry *root_xcall_dir; - if (!static_key_enabled(&xcall_enable)) return 0; diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index fdc27543d95d..e96a4b808ea8 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -513,7 +513,7 @@ struct xcall_prog xcall_prefetch_prog = { static int __init init_xcall_prefetch_procfs(void) { - xcall_proc_dir = proc_mkdir("xcall_feature", NULL); + xcall_proc_dir = xcall_subdir_create("prefetch"); if (!xcall_proc_dir) return -ENOMEM; prefetch_dir = proc_create("prefetch", 0640, xcall_proc_dir, diff --git a/include/linux/xcall.h b/include/linux/xcall.h index 510aebe4e7c0..26c60cd3dd38 100644 --- a/include/linux/xcall.h +++ b/include/linux/xcall.h @@ -32,6 +32,7 @@ struct xcall_prog { extern int xcall_prog_register(struct xcall_prog *prog); extern void xcall_prog_unregister(struct xcall_prog *prog); extern void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p); +extern struct proc_dir_entry *xcall_subdir_create(const char *name); extern void clear_xcall_area(struct mm_struct *mm); extern int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm); #else /* !CONFIG_DYNAMIC_XCALL */ -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- My test is restart redis server and send kill command to kill it repeatedly. Then I found when the second time to start redis, it cannot run and reported 6379 port is been occpuied. It means 6379 port refcount is not dec to 0. In epoll_ctl(), it will fget file then fput it in close(). But if we kill the process, it has no chance to do the close() in prefetch module. Thus, the 6379 port is not released. Since we only need the struct file address, actually we don't need to add the refcount. Just fget then fput both in epoll_ctl(). Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index e96a4b808ea8..c49f44edaf38 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -389,13 +389,11 @@ static long __do_sys_epoll_ctl(struct pt_regs *regs) file = fget(fd); if (!file) return ret; + if (sock_from_file(file)) + cmpxchg(&pfi->file, NULL, file); + fput(file); + break; - if (!sock_from_file(file)) { - fput(file); - return ret; - } - if (cmpxchg(&pfi->file, NULL, file)) - fput(file); break; case EPOLL_CTL_DEL: xcall_cancel_work(fd); @@ -456,7 +454,6 @@ static long __do_sys_close(struct pt_regs *regs) pfi_old_file = pfi->file; pfi_new_file = cmpxchg(&pfi->file, pfi_old_file, NULL); if (pfi_new_file == pfi_old_file) { - fput(pfi_old_file); atomic_set(&pfi->state, XCALL_CACHE_NONE); pfi->len = 0; pfi->pos = 0; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- To store per-process data, introducing a prefetch_mm_data. There are three important member in this struct. For prefetch_mm_data.items, this is a prefetch item array. We can use file descriptor value to index to its prefetch item. For prefetch_mm_data.events, it serves in epoll_ctl(). We use this buffer to copy ready fd-list from user. For prefetch_mm_data.mmu_notifier, in my test, when redis is being killed or exit abnormally. It will cause memleak since we has no chance to free the buffer page in prefetch_mm_data.items. To open a window for each process to free their resources, just use this mmu_notifier to register a callback in MMU. Fixes: ccb8cd0c6313 ("xcall2.0: Introduce xcall epollwait prefetch feature") Signed-off-by: Xinyu Zheng <zhengxinyu6@huawei.com> --- drivers/staging/xcall/prefetch.c | 130 ++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 44 deletions(-) diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c index c49f44edaf38..c3fbc6d2ef4f 100644 --- a/drivers/staging/xcall/prefetch.c +++ b/drivers/staging/xcall/prefetch.c @@ -17,13 +17,13 @@ #include <asm/xcall.h> -#define MAX_FD 100 +#define MAX_FD 1024 #define XCALL_CACHE_PAGE_ORDER 2 #define XCALL_CACHE_BUF_SIZE ((1 << XCALL_CACHE_PAGE_ORDER) * PAGE_SIZE) -#define current_prefetch_items() \ - ((struct prefetch_item *) \ +#define current_prefetch_mm_data() \ + ((struct prefetch_mm_data *) \ ((((struct xcall_area *)(current->mm->xcall))->sys_call_data)[__NR_epoll_pwait])) static DEFINE_PER_CPU_ALIGNED(unsigned long, xcall_cache_hit); @@ -33,6 +33,9 @@ static struct workqueue_struct *rc_work; static struct cpumask xcall_mask; struct proc_dir_entry *xcall_proc_dir, *prefetch_dir, *xcall_mask_dir; +static struct list_head prefetch_mm_data_to_delete; +static spinlock_t prefetch_mm_delete_lock; + enum cache_state { XCALL_CACHE_NONE = 0, XCALL_CACHE_PREFETCH, @@ -53,6 +56,13 @@ struct prefetch_item { loff_t pos; }; +struct prefetch_mm_data { + struct prefetch_item items[MAX_FD]; + struct epoll_event events[MAX_FD]; + struct mmu_notifier mmu_notifier; + struct list_head list; +}; + static ssize_t xcall_mask_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -140,6 +150,17 @@ static const struct proc_ops xcall_prefetch_fops = { .proc_release = single_release }; +static inline struct prefetch_item *get_pfi(unsigned int fd) +{ + struct prefetch_item *pfis = NULL; + + if (fd >= MAX_FD || !current_prefetch_mm_data()) + return NULL; + + pfis = (struct prefetch_item *)current_prefetch_mm_data(); + return pfis + fd; +} + static inline bool transition_state(struct prefetch_item *pfi, enum cache_state old, enum cache_state new) { @@ -202,36 +223,53 @@ static int get_async_prefetch_cpu(struct prefetch_item *pfi) static void prefetch_pfi_release(struct mmu_notifier *mn, struct mm_struct *mm) { + struct prefetch_mm_data *private_data = + container_of(mn, struct prefetch_mm_data, mmu_notifier); struct xcall_area *area = mm_xcall_area(mm); struct prefetch_item *prefetch_items = NULL; int i; - prefetch_items = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); - if (!prefetch_items) - return; - + private_data = xchg(&area->sys_call_data[__NR_epoll_pwait], NULL); + prefetch_items = (struct prefetch_item *)private_data->items; for (i = 0; i < MAX_FD; i++) { cancel_work_sync(&prefetch_items[i].work); if (prefetch_items[i].cache_pages) __free_pages(prefetch_items[i].cache_pages, XCALL_CACHE_PAGE_ORDER); prefetch_items[i].cache = NULL; } - kfree(prefetch_items); + spin_lock(&prefetch_mm_delete_lock); + list_add_tail(&private_data->list, &prefetch_mm_data_to_delete); + spin_unlock(&prefetch_mm_delete_lock); +} + +static void xcall_mmu_notifier_free(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct prefetch_mm_data, mmu_notifier)); +} + +static void xcall_prefetch_mm_free(void) +{ + struct prefetch_mm_data *private_data, *tmp; + + spin_lock(&prefetch_mm_delete_lock); + list_for_each_entry_safe(private_data, tmp, &prefetch_mm_data_to_delete, list) { + list_del(&private_data->list); + mmu_notifier_put(&private_data->mmu_notifier); + } + spin_unlock(&prefetch_mm_delete_lock); } static struct mmu_notifier_ops xcall_mmu_notifier_ops = { .release = prefetch_pfi_release, -}; - -static struct mmu_notifier xcall_mmu_notifier = { - .ops = &xcall_mmu_notifier_ops, + .free_notifier = xcall_mmu_notifier_free, }; static void xcall_cancel_work(unsigned int fd) { - struct prefetch_item *pfi = current_prefetch_items() + fd; + struct prefetch_item *pfi = NULL; - if (fd < MAX_FD && pfi->file) + pfi = get_pfi(fd); + if (pfi && pfi->file) cancel_work_sync(&pfi->work); } @@ -301,11 +339,8 @@ static inline int xcall_read_begin(unsigned int fd, char __user *buf, size_t cou { struct prefetch_item *pfi = NULL; - if (fd >= MAX_FD || !current_prefetch_items()) - return -EAGAIN; - - pfi = current_prefetch_items() + fd; - if (!pfi->file) + pfi = get_pfi(fd); + if (!pfi || !pfi->file) return -EAGAIN; return xcall_read(pfi, buf, count); @@ -315,11 +350,8 @@ static inline void xcall_read_end(unsigned int fd) { struct prefetch_item *pfi = NULL; - if (fd >= MAX_FD || !current_prefetch_items()) - return; - - pfi = current_prefetch_items() + fd; - if (!pfi->file) + pfi = get_pfi(fd); + if (!pfi || !pfi->file) return; transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); @@ -330,24 +362,24 @@ static long __do_sys_epoll_create(struct pt_regs *regs) long ret; int i; struct xcall_area *area = mm_xcall_area(current->mm); + struct prefetch_mm_data *private_data = NULL; struct prefetch_item *items = NULL; ret = default_sys_call_table()[__NR_epoll_create1](regs); if (ret < 0) return ret; - - if (current_prefetch_items()) + if (current_prefetch_mm_data()) return ret; - items = kcalloc(MAX_FD, sizeof(struct prefetch_item), GFP_KERNEL); - if (!items) - return -ENOMEM; - - if (cmpxchg(&area->sys_call_data[__NR_epoll_pwait], NULL, items)) { - kfree(items); + private_data = kmalloc(sizeof(struct prefetch_mm_data), GFP_KERNEL); + if (!private_data) + return ret; + if (cmpxchg(&area->sys_call_data[__NR_epoll_pwait], NULL, private_data)) { + kfree(private_data); return ret; } + items = private_data->items; for (i = 0; i < MAX_FD; i++) { items[i].cache_pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, XCALL_CACHE_PAGE_ORDER); @@ -364,7 +396,12 @@ static long __do_sys_epoll_create(struct pt_regs *regs) items[i].file = NULL; set_prefetch_numa_cpu(&items[i]); } - mmu_notifier_register(&xcall_mmu_notifier, current->mm); + + memset(private_data->events, 0, sizeof(private_data->events)); + INIT_LIST_HEAD(&private_data->list); + private_data->mmu_notifier.ops = &xcall_mmu_notifier_ops; + mmu_notifier_register(&private_data->mmu_notifier, current->mm); + xcall_prefetch_mm_free(); return ret; } @@ -377,13 +414,13 @@ static long __do_sys_epoll_ctl(struct pt_regs *regs) long ret; ret = default_sys_call_table()[__NR_epoll_ctl](regs); - if (ret || fd >= MAX_FD) + if (ret) return ret; - if (!current_prefetch_items()) + pfi = get_pfi(fd); + if (!pfi) return ret; - pfi = current_prefetch_items() + fd; switch (op) { case EPOLL_CTL_ADD: file = fget(fd); @@ -393,8 +430,6 @@ static long __do_sys_epoll_ctl(struct pt_regs *regs) cmpxchg(&pfi->file, NULL, file); fput(file); break; - - break; case EPOLL_CTL_DEL: xcall_cancel_work(fd); break; @@ -407,7 +442,8 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) { void __user *buf = (void *)regs->regs[1]; struct prefetch_item *pfi = NULL; - struct epoll_event events[MAX_FD] = {0}; + struct prefetch_mm_data *private_data = NULL; + struct epoll_event *events = NULL; int i, fd, cpu, prefetch_task_num; long ret; @@ -415,9 +451,12 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) if (ret <= 0) return ret; - if (!current_prefetch_items()) + if (!current_prefetch_mm_data()) return ret; + private_data = current_prefetch_mm_data(); + events = private_data->events; + prefetch_task_num = ret > MAX_FD ? MAX_FD : ret; if (copy_from_user(events, buf, prefetch_task_num * sizeof(struct epoll_event))) return ret; @@ -427,7 +466,7 @@ static long __do_sys_epoll_pwait(struct pt_regs *regs) if (!(events[i].events & EPOLLIN) || fd >= MAX_FD) continue; - pfi = current_prefetch_items() + fd; + pfi = get_pfi(fd); if (!(pfi->file) || !(pfi->file->f_mode & FMODE_READ)) continue; if (atomic_read(&pfi->state) != XCALL_CACHE_NONE) @@ -446,11 +485,11 @@ static long __do_sys_close(struct pt_regs *regs) struct file *pfi_old_file = NULL; struct file *pfi_new_file = NULL; - if (!current_prefetch_items()) + pfi = get_pfi(fd); + if (!pfi) return default_sys_call_table()[__NR_close](regs); - pfi = current_prefetch_items() + fd; - if (fd < MAX_FD && pfi->file) { + if (pfi && pfi->file) { pfi_old_file = pfi->file; pfi_new_file = cmpxchg(&pfi->file, pfi_old_file, NULL); if (pfi_new_file == pfi_old_file) { @@ -551,6 +590,8 @@ static int __init xcall_prefetch_init(void) if (ret) goto remove_dir; + INIT_LIST_HEAD(&prefetch_mm_data_to_delete); + spin_lock_init(&prefetch_mm_delete_lock); return ret; remove_dir: @@ -575,6 +616,7 @@ static void __exit xcall_prefetch_exit(void) proc_remove(xcall_proc_dir); xcall_prog_unregister(&xcall_prefetch_prog); + mmu_notifier_synchronize(); } module_init(xcall_prefetch_init); -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/19163 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/XCX... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/19163 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/XCX...
participants (2)
-
patchwork bot -
Xinyu Zheng