From: liubo liubo254@huawei.com
euleros inclusion category: feature feature: etmem bugzilla: 49889
-------------------------------------------------
In order to achieve the goal of memory expansion, cold pages need to be migrated to the swap partition, etmem_swap.ko is to achieve this purpose.
This patch is mainly used to generate etmem_swap.ko. etmem_swap.ko is used to transfer the address passed in the user state for page migration.
Signed-off-by: yanxiaodan yanxiaodan@huawei.com Signed-off-by: linmiaohe linmiaohe@huawei.com Signed-off-by: louhongxiang louhongxiang@huawei.com Signed-off-by: liubo liubo254@huawei.com Signed-off-by: geruijun geruijun@huawei.com Signed-off-by: liangchenshu liangchenshu@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Jing Xiangfengjingxiangfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/proc/Makefile | 1 + fs/proc/base.c | 2 + fs/proc/etmem_swap.c | 102 +++++++++++++++++++++++++++++++++++++++ fs/proc/internal.h | 1 + fs/proc/task_mmu.c | 51 ++++++++++++++++++++ include/linux/swap.h | 5 ++ lib/Kconfig | 5 ++ mm/vmscan.c | 112 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 279 insertions(+) create mode 100644 fs/proc/etmem_swap.c
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1ebd017a83bc..b9c9f59aba456 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,3 +34,4 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o +obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 9ea434f9da4ee..c66f5ffadb58d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2990,6 +2990,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations), + REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -3379,6 +3380,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations), + REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/etmem_swap.c b/fs/proc/etmem_swap.c new file mode 100644 index 0000000000000..b24c706c3b2a3 --- /dev/null +++ b/fs/proc/etmem_swap.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/sched/mm.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/mempolicy.h> +#include <linux/uaccess.h> +#include <linux/delay.h> + +static ssize_t swap_pages_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char *p, *data, *data_ptr_res; + unsigned long vaddr; + struct mm_struct *mm = file->private_data; + struct page *page; + LIST_HEAD(pagelist); + int ret = 0; + + if (!mm || !mmget_not_zero(mm)) { + ret = -ESRCH; + goto out; + } + + if (count < 0) { + ret = -EOPNOTSUPP; + goto out_mm; + } + + data = memdup_user_nul(buf, count); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto out_mm; + } + + data_ptr_res = data; + while ((p = strsep(&data, "\n")) != NULL) { + if (!*p) + continue; + + ret = kstrtoul(p, 16, &vaddr); + if (ret != 0) + continue; + /*If get page struct failed, ignore it, get next page*/ + page = get_page_from_vaddr(mm, vaddr); + if (!page) + continue; + + add_page_for_swap(page, &pagelist); + } + + if (!list_empty(&pagelist)) + reclaim_pages(&pagelist); + + ret = count; + kfree(data_ptr_res); +out_mm: + mmput(mm); +out: + return ret; +} + +static int swap_pages_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + return 0; +} + +static int swap_pages_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + + +extern struct file_operations proc_swap_pages_operations; + +static int swap_pages_entry(void) +{ + proc_swap_pages_operations.owner = THIS_MODULE; + proc_swap_pages_operations.write = swap_pages_write; + proc_swap_pages_operations.open = swap_pages_open; + proc_swap_pages_operations.release = swap_pages_release; + + return 0; +} + +static void swap_pages_exit(void) +{ + memset(&proc_swap_pages_operations, 0, + sizeof(proc_swap_pages_operations)); +} + +MODULE_LICENSE("GPL"); +module_init(swap_pages_entry); +module_exit(swap_pages_exit); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 55b4a9b716a99..2df432cc38af3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -300,6 +300,7 @@ extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_mm_idle_operations; +extern const struct file_operations proc_mm_swap_operations;
extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee258adb631ba..ac7f57badcfdf 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1663,7 +1663,58 @@ const struct file_operations proc_mm_idle_operations = { .release = mm_idle_release, };
+/*swap pages*/ +struct file_operations proc_swap_pages_operations = { +}; +EXPORT_SYMBOL_GPL(proc_swap_pages_operations); + +static ssize_t mm_swap_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (proc_swap_pages_operations.write) + return proc_swap_pages_operations.write(file, buf, count, ppos); + + return -1; +} + +static int mm_swap_open(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = NULL; + + if (!file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(mm)) + return PTR_ERR(mm); + + file->private_data = mm; + + if (proc_swap_pages_operations.open) + return proc_swap_pages_operations.open(inode, file); + + return 0; +} + +static int mm_swap_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data;
+ if (mm) + mmdrop(mm); + + if (proc_swap_pages_operations.release) + return proc_swap_pages_operations.release(inode, file); + + return 0; +} + +const struct file_operations proc_mm_swap_operations = { + .llseek = mem_lseek, + .write = mm_swap_write, + .open = mm_swap_open, + .release = mm_swap_release, +}; #endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA diff --git a/include/linux/swap.h b/include/linux/swap.h index aecda6766417a..e7fa800d52ea3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -374,6 +374,11 @@ extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); extern unsigned long vm_total_pages;
+extern unsigned long reclaim_pages(struct list_head *page_list); +extern int add_page_for_swap(struct page *page, struct list_head *pagelist); +extern struct page *get_page_from_vaddr(struct mm_struct *mm, + unsigned long vaddr); + #ifdef CONFIG_SHRINK_PAGECACHE extern unsigned long vm_cache_limit_ratio; extern unsigned long vm_cache_limit_ratio_min; diff --git a/lib/Kconfig b/lib/Kconfig index f332b0a05db26..edb7d40d1f608 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -605,6 +605,11 @@ config ETMEM_SCAN etmem page scan feature used to scan the virtual address of the target process
+config ETMEM_SWAP + tristate "module: etmem page swap for etmem support" + help + etmem page swap feature + config STRING_SELFTEST tristate "Test string functions"
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7cfa9561c2568..92be608b467b6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -36,6 +36,7 @@ #include <linux/topology.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/mempolicy.h> #include <linux/compaction.h> #include <linux/notifier.h> #include <linux/rwsem.h> @@ -4403,3 +4404,114 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) } } #endif /* CONFIG_SHMEM */ + +unsigned long reclaim_pages(struct list_head *page_list) +{ + int nid = NUMA_NO_NODE; + unsigned long nr_reclaimed = 0; + LIST_HEAD(node_page_list); + struct reclaim_stat dummy_stat; + struct page *page; + struct scan_control sc = { + .gfp_mask = GFP_KERNEL, + .priority = DEF_PRIORITY, + .may_writepage = 1, + .may_unmap = 1, + .may_swap = 1, + }; + + while (!list_empty(page_list)) { + page = lru_to_page(page_list); + if (nid == NUMA_NO_NODE) { + nid = page_to_nid(page); + INIT_LIST_HEAD(&node_page_list); + } + + if (nid == page_to_nid(page)) { + ClearPageActive(page); + list_move(&page->lru, &node_page_list); + continue; + } + + nr_reclaimed += shrink_page_list(&node_page_list, + NODE_DATA(nid), + &sc, 0, + &dummy_stat, false); + while (!list_empty(&node_page_list)) { + page = lru_to_page(&node_page_list); + list_del(&page->lru); + putback_lru_page(page); + } + + nid = NUMA_NO_NODE; + } + + if (!list_empty(&node_page_list)) { + nr_reclaimed += shrink_page_list(&node_page_list, + NODE_DATA(nid), + &sc, 0, + &dummy_stat, false); + while (!list_empty(&node_page_list)) { + page = lru_to_page(&node_page_list); + list_del(&page->lru); + putback_lru_page(page); + } + } + + return nr_reclaimed; +} +EXPORT_SYMBOL_GPL(reclaim_pages); + +int add_page_for_swap(struct page *page, struct list_head *pagelist) +{ + int err = -EBUSY; + struct page *head; + + /*If the page is mapped by more than one process, do not swap it */ + if (page_mapcount(page) > 1) + return -EACCES; + + if (PageHuge(page)) + return -EACCES; + + head = compound_head(page); + err = isolate_lru_page(head); + if (err) { + put_page(page); + return err; + } + put_page(page); + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add_tail(&head->lru, pagelist); + + err = 0; + return err; +} +EXPORT_SYMBOL_GPL(add_page_for_swap); +struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr) +{ + struct page *page; + struct vm_area_struct *vma; + unsigned int follflags; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, vaddr); + if (!vma || vaddr < vma->vm_start || vma->vm_flags & VM_LOCKED) { + up_read(&mm->mmap_sem); + return NULL; + } + + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, vaddr, follflags); + if (IS_ERR(page) || !page) { + up_read(&mm->mmap_sem); + return NULL; + } + + up_read(&mm->mmap_sem); + return page; +} +EXPORT_SYMBOL_GPL(get_page_from_vaddr);