euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7RO5Q Reference: https://gitee.com/openeuler/kernel/commit/8a655676e63680d2c02acf479f4cfaa923...
-------------------------------------------------
reason:This patch is used to add memig swap feature to openEuler system. memig_swap.ko is used to transfer the address passed in the user state for page migration
Signed-off-by: yanxiaodan yanxiaodan@huawei.com Signed-off-by: linmiaohe linmiaohe@huawei.com Signed-off-by: louhongxiang louhongxiang@huawei.com Signed-off-by: liubo liubo254@huawei.com Signed-off-by: geruijun geruijun@huawei.com Signed-off-by: liangchenshu liangchenshu@huawei.com Reviewed-by: Jing Xiangfeng jingxiangfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + fs/proc/Makefile | 1 + fs/proc/base.c | 2 + fs/proc/internal.h | 1 + fs/proc/memig_swap.c | 102 +++++++++++++++++++++++++ fs/proc/task_mmu.c | 51 +++++++++++++ include/linux/swap.h | 4 +- lib/Kconfig | 5 ++ mm/vmscan.c | 57 ++++++++++++++ 10 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 fs/proc/memig_swap.c
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index fc6af3dbf1c8..d526512c1473 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7811,6 +7811,7 @@ CONFIG_ARCH_HAS_KCOV=y # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_ARCH_USE_MEMTEST=y CONFIG_MEMIG_SCAN_MODULE=m +CONFIG_MEMIG_SWAP_MODULE=m # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 6fb2f9270251..35190a4cee4f 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -9047,6 +9047,7 @@ CONFIG_ARCH_HAS_KCOV=y # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_ARCH_USE_MEMTEST=y CONFIG_MEMIG_SCAN_MODULE=m +CONFIG_MEMIG_SWAP_MODULE=m # CONFIG_MEMTEST is not set # CONFIG_HYPERV_TESTING is not set # end of Kernel Testing and Coverage diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 50c6de6f4979..e6747114a75b 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -35,3 +35,4 @@ proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o obj-$(CONFIG_MEMIG_SCAN_MODULE) += memig_scan.o +obj-$(CONFIG_MEMIG_SWAP_MODULE) += memig_swap.o diff --git a/fs/proc/base.c b/fs/proc/base.c index ac108995d68b..c5fbe0815614 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3354,6 +3354,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations), + REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -3703,6 +3704,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations), + REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 09867fbcbafc..0e6bf977ba23 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -306,6 +306,7 @@ extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_mm_idle_operations; +extern const struct file_operations proc_mm_swap_operations;
extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, diff --git a/fs/proc/memig_swap.c b/fs/proc/memig_swap.c new file mode 100644 index 000000000000..b24c706c3b2a --- /dev/null +++ b/fs/proc/memig_swap.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/sched/mm.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/mempolicy.h> +#include <linux/uaccess.h> +#include <linux/delay.h> + +static ssize_t swap_pages_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char *p, *data, *data_ptr_res; + unsigned long vaddr; + struct mm_struct *mm = file->private_data; + struct page *page; + LIST_HEAD(pagelist); + int ret = 0; + + if (!mm || !mmget_not_zero(mm)) { + ret = -ESRCH; + goto out; + } + + if (count < 0) { + ret = -EOPNOTSUPP; + goto out_mm; + } + + data = memdup_user_nul(buf, count); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto out_mm; + } + + data_ptr_res = data; + while ((p = strsep(&data, "\n")) != NULL) { + if (!*p) + continue; + + ret = kstrtoul(p, 16, &vaddr); + if (ret != 0) + continue; + /*If get page struct failed, ignore it, get next page*/ + page = get_page_from_vaddr(mm, vaddr); + if (!page) + continue; + + add_page_for_swap(page, &pagelist); + } + + if (!list_empty(&pagelist)) + reclaim_pages(&pagelist); + + ret = count; + kfree(data_ptr_res); +out_mm: + mmput(mm); +out: + return ret; +} + +static int swap_pages_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + return 0; +} + +static int swap_pages_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + + +extern struct file_operations proc_swap_pages_operations; + +static int swap_pages_entry(void) +{ + proc_swap_pages_operations.owner = THIS_MODULE; + proc_swap_pages_operations.write = swap_pages_write; + proc_swap_pages_operations.open = swap_pages_open; + proc_swap_pages_operations.release = swap_pages_release; + + return 0; +} + +static void swap_pages_exit(void) +{ + memset(&proc_swap_pages_operations, 0, + sizeof(proc_swap_pages_operations)); +} + +MODULE_LICENSE("GPL"); +module_init(swap_pages_entry); +module_exit(swap_pages_exit); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8fdd14fb8ff9..f771df409978 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1835,7 +1835,58 @@ const struct file_operations proc_mm_idle_operations = { .release = mm_idle_release, };
+/*swap pages*/ +struct file_operations proc_swap_pages_operations = { +}; +EXPORT_SYMBOL_GPL(proc_swap_pages_operations); + +static ssize_t mm_swap_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (proc_swap_pages_operations.write) + return proc_swap_pages_operations.write(file, buf, count, ppos); + + return -1; +} + +static int mm_swap_open(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = NULL; + + if (!file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(mm)) + return PTR_ERR(mm); + + file->private_data = mm; + + if (proc_swap_pages_operations.open) + return proc_swap_pages_operations.open(inode, file); + + return 0; +} + +static int mm_swap_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data;
+ if (mm) + mmdrop(mm); + + if (proc_swap_pages_operations.release) + return proc_swap_pages_operations.release(inode, file); + + return 0; +} + +const struct file_operations proc_mm_swap_operations = { + .llseek = mem_lseek, + .write = mm_swap_write, + .open = mm_swap_open, + .release = mm_swap_release, +}; #endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA diff --git a/include/linux/swap.h b/include/linux/swap.h index 3c69cb653cb9..96666ca67a15 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -441,7 +441,9 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); - +extern int add_page_for_swap(struct page *page, struct list_head *pagelist); +extern struct page *get_page_from_vaddr(struct mm_struct *mm, + unsigned long vaddr); #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; diff --git a/lib/Kconfig b/lib/Kconfig index d6ab862f7040..b341e880adf3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -734,6 +734,11 @@ config MEMIG_SCAN_MODULE memig page scan feature used to scan the virtual address of the target process
+config MEMIG_SWAP_MODULE + tristate "module: memig page swap for memig support" + help + memig page swap feature + endmenu
config GENERIC_IOREMAP diff --git a/mm/vmscan.c b/mm/vmscan.c index 5bf98d0a22c9..9287be69e468 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -33,6 +33,7 @@ #include <linux/topology.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/mempolicy.h> #include <linux/compaction.h> #include <linux/notifier.h> #include <linux/rwsem.h> @@ -2814,6 +2815,7 @@ unsigned long reclaim_pages(struct list_head *folio_list)
return nr_reclaimed; } +EXPORT_SYMBOL_GPL(reclaim_pages);
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) @@ -8105,3 +8107,58 @@ void check_move_unevictable_folios(struct folio_batch *fbatch) } } EXPORT_SYMBOL_GPL(check_move_unevictable_folios); + +int add_page_for_swap(struct page *page, struct list_head *pagelist) +{ + int err = -EBUSY; + struct page *head; + + /*If the page is mapped by more than one process, do not swap it */ + if (page_mapcount(page) > 1) + return -EACCES; + + if (PageHuge(page)) + return -EACCES; + + head = compound_head(page); + err = isolate_lru_page(head); + if (err) { + put_page(page); + return err; + } + put_page(page); + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add_tail(&head->lru, pagelist); + + err = 0; + return err; +} +EXPORT_SYMBOL_GPL(add_page_for_swap); + +struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr) +{ + struct page *page; + struct vm_area_struct *vma; + unsigned int follflags; + + down_read(&mm->mmap_lock); + + vma = find_vma(mm, vaddr); + if (!vma || vaddr < vma->vm_start || !vma_migratable(vma)) { + up_read(&mm->mmap_lock); + return NULL; + } + + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, vaddr, follflags); + if (IS_ERR(page) || !page) { + up_read(&mm->mmap_lock); + return NULL; + } + + up_read(&mm->mmap_lock); + return page; +} +EXPORT_SYMBOL_GPL(get_page_from_vaddr);