On 2024/1/9 18:58, Yuchen Tang wrote:
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8T1MB?from=project-issue CVE: NA
This patch implements the etmem swap feature.
etmem swap, like etmem scan, also communicated with the user space program thought a registered proc file system. It adds target pages to swap cache to be further reclaimed by kswapd, and dwell in swap space.
Signed-off-by: yanxiaodan yanxiaodan@huawei.com Signed-off-by: linmiaohe linmiaohe@huawei.com Signed-off-by: louhongxiang louhongxiang@huawei.com Signed-off-by: liubo liubo254@huawei.com Signed-off-by: geruijun geruijun@huawei.com Signed-off-by: Yuchen Tang tangyuchen5@huawei.com
fs/proc/Makefile | 5 +- fs/proc/base.c | 2 + fs/proc/etmem_proc.c | 94 ++++++++++++++++++++++++++++++++++++ fs/proc/etmem_swap.c | 109 ++++++++++++++++++++++++++++++++++++++++++ fs/proc/internal.h | 1 + include/linux/etmem.h | 13 +++++ mm/Kconfig | 10 ++++ mm/Makefile | 1 + mm/etmem.c | 64 +++++++++++++++++++++++++ 9 files changed, 297 insertions(+), 2 deletions(-) create mode 100644 fs/proc/etmem_swap.c create mode 100644 mm/etmem.c
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index b9a7bc7d8a75..fe283f354d61 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o -obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o -proc-${CONFIG_ETMEM} += etmem_proc.o +obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o +obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o +proc-${CONFIG_ETMEM} += etmem_proc.o
一直没注意这个地方,为啥改,对齐? 那之前补丁对齐不对吗
diff --git a/fs/proc/base.c b/fs/proc/base.c index eb5ab83e5c22..5be0273a70c4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3357,6 +3357,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_ETMEM REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations),
- REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -3709,6 +3710,7 @@ static const struct pid_entry tid_base_stuff[] = { #endif #ifdef CONFIG_ETMEM REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations),
- REG("swap_pages", S_IWUSR, proc_mm_swap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/etmem_proc.c b/fs/proc/etmem_proc.c index edacb9260345..2e6712cc43b2 100644 --- a/fs/proc/etmem_proc.c +++ b/fs/proc/etmem_proc.c @@ -120,3 +120,97 @@ const struct file_operations proc_mm_idle_operations = { .release = mm_idle_release, .unlocked_ioctl = mm_idle_ioctl, };
+static DEFINE_SPINLOCK(swap_lock);
+static int page_swap_lock(struct file *file, int is_lock, struct file_lock *flock) +{
- if (is_lock)
spin_lock(&swap_lock);
- else
spin_unlock(&swap_lock);
- return 0;
+} +/*swap pages*/ +struct file_operations proc_swap_pages_operations = {
- .flock = page_swap_lock,
+}; +EXPORT_SYMBOL_GPL(proc_swap_pages_operations);
+static ssize_t mm_swap_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
+{
- if (proc_swap_pages_operations.write)
return proc_swap_pages_operations.write(file, buf, count, ppos);
- return -1;
+}
+static int mm_swap_open(struct inode *inode, struct file *file) +{
- struct mm_struct *mm = NULL;
- struct module *module = NULL;
- int ret = -1;
- if (!file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN))
return -EPERM;
- page_swap_lock(NULL, 1, NULL);
- module = proc_swap_pages_operations.owner;
- if (module != NULL && try_module_get(module))
ret = 0;
- page_swap_lock(NULL, 0, NULL);
- if (ret != 0) {
/* no swap ko installed, avoid to return valid file */
return -ENODEV;
- }
- mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(mm)) {
module_put(module);
return PTR_ERR(mm);
- }
- file->private_data = mm;
- if (proc_swap_pages_operations.open)
ret = proc_swap_pages_operations.open(inode, file);
- if (ret != 0)
module_put(module);
- return ret;
+}
+static int mm_swap_release(struct inode *inode, struct file *file) +{
- struct mm_struct *mm = file->private_data;
- int ret = 0;
- if (mm)
mmdrop(mm);
- if (proc_swap_pages_operations.release)
ret = proc_swap_pages_operations.release(inode, file);
- if (proc_swap_pages_operations.owner)
module_put(proc_swap_pages_operations.owner);
- return ret;
+}
+static long mm_swap_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{
- if (proc_swap_pages_operations.unlocked_ioctl)
return proc_swap_pages_operations.unlocked_ioctl(filp, cmd, arg);
- return 0;
+}
+const struct file_operations proc_mm_swap_operations = {
- .llseek = mem_lseek,
- .write = mm_swap_write,
- .open = mm_swap_open,
- .release = mm_swap_release,
- .unlocked_ioctl = mm_swap_ioctl,
+}; diff --git a/fs/proc/etmem_swap.c b/fs/proc/etmem_swap.c new file mode 100644 index 000000000000..4aad6b9db9a6 --- /dev/null +++ b/fs/proc/etmem_swap.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/sched/mm.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/mempolicy.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/etmem.h>
+static ssize_t swap_pages_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
+{
- char *p, *data, *data_ptr_res;
- unsigned long vaddr;
- struct mm_struct *mm = file->private_data;
- struct page *page;
- LIST_HEAD(pagelist);
- int ret = 0;
- if (!mm || !mmget_not_zero(mm)) {
ret = -ESRCH;
goto out;
- }
- if (count < 0) {
ret = -EOPNOTSUPP;
goto out_mm;
- }
- data = memdup_user_nul(buf, count);
- if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto out_mm;
- }
- data_ptr_res = data;
- while ((p = strsep(&data, "\n")) != NULL) {
if (!*p)
continue;
ret = kstrtoul(p, 16, &vaddr);
if (ret != 0)
continue;
/* If get page struct failed, ignore it, get next page */
page = get_page_from_vaddr(mm, vaddr);
if (!page)
continue;
add_page_for_swap(page, &pagelist);
- }
- if (!list_empty(&pagelist))
reclaim_pages(&pagelist);
- ret = count;
- kfree(data_ptr_res);
+out_mm:
- mmput(mm);
+out:
- return ret;
+}
+static int swap_pages_open(struct inode *inode, struct file *file) +{
- if (!try_module_get(THIS_MODULE))
return -EBUSY;
- return 0;
+}
+static int swap_pages_release(struct inode *inode, struct file *file) +{
- module_put(THIS_MODULE);
- return 0;
+}
+extern struct file_operations proc_swap_pages_operations;
+static int swap_pages_entry(void) +{
- proc_swap_pages_operations.flock(NULL, 1, NULL);
- proc_swap_pages_operations.owner = THIS_MODULE;
- proc_swap_pages_operations.write = swap_pages_write;
- proc_swap_pages_operations.open = swap_pages_open;
- proc_swap_pages_operations.release = swap_pages_release;
- proc_swap_pages_operations.flock(NULL, 0, NULL);
- return 0;
+}
+static void swap_pages_exit(void) +{
- proc_swap_pages_operations.flock(NULL, 1, NULL);
- proc_swap_pages_operations.owner = NULL;
- proc_swap_pages_operations.write = NULL;
- proc_swap_pages_operations.open = NULL;
- proc_swap_pages_operations.release = NULL;
- proc_swap_pages_operations.flock(NULL, 0, NULL);
+}
+MODULE_LICENSE("GPL"); +module_init(swap_pages_entry); +module_exit(swap_pages_exit); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a9615455b709..be6d5dfc330c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -305,6 +305,7 @@ extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; #ifdef CONFIG_ETMEM extern const struct file_operations proc_mm_idle_operations; +extern const struct file_operations proc_mm_swap_operations; #endif
extern unsigned long task_vsize(struct mm_struct *); diff --git a/include/linux/etmem.h b/include/linux/etmem.h index e8a2585f3891..5ebd1c3274b7 100644 --- a/include/linux/etmem.h +++ b/include/linux/etmem.h @@ -22,6 +22,19 @@ static inline struct kvm *mm_kvm(struct mm_struct *mm) } #endif
+extern int add_page_for_swap(struct page *page, struct list_head *pagelist); +extern struct page *get_page_from_vaddr(struct mm_struct *mm,
unsigned long vaddr);
+#else /* !CONFIG_ETMEM */ +static inline int add_page_for_swap(struct page *page, struct list_head *pagelist) +{
- return 0;
+}
+static inline struct page *get_page_from_vaddr(struct mm_struct *mm,
unsigned long vaddr)
+{
- return NULL;
+} #endif /* #ifdef CONFIG_ETMEM */ #endif /* define __MM_ETMEM_H_ */ diff --git a/mm/Kconfig b/mm/Kconfig index 980eaf75d87b..57818763e3c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1359,6 +1359,16 @@ config ETMEM_SCAN scan results back to user space. etmem scan also supports virtual machine
+config ETMEM_SWAP
- tristate "module: etmem page swap for etmem support"
- depends on ETMEM
- help
etmem swap is a critical component of the etmem feature.
When using etmem slide engine, etmem_swap.ko will add appointed pages
(ideally all of which are all rarely used, "cold" pages) to swapcache
proactively, which will later be reclaimed and added to swap space,
making room for more frequently used, "hot" pages.
- config ETMEM bool "Enable etmem feature" depends on MMU
diff --git a/mm/Makefile b/mm/Makefile index 6759053ed782..3d15ba814dd7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -142,5 +142,6 @@ obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_SHARE_POOL) += share_pool.o obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o +obj-$(CONFIG_ETMEM) += etmem.o obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o diff --git a/mm/etmem.c b/mm/etmem.c new file mode 100644 index 000000000000..9a89bfcc1058 --- /dev/null +++ b/mm/etmem.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/memcontrol.h> +#include <linux/gfp.h> +#include <linux/mm_inline.h> +#include <linux/sysctl.h> +#include <linux/etmem.h> +#include "internal.h"
+int add_page_for_swap(struct page *page, struct list_head *pagelist) +{
- int err = -EBUSY;
- struct page *head;
struct folio *folio;
- /* If the page is mapped by more than one process, do not swap it */
- if (page_mapcount(page) > 1)
return -EACCES;
folio = page_folio(page); if (folio_test_hugetlb) return xx; if (!folio_isolate_lru(folio)) { folio_put(); } xxx包括下面的page函数或者api
这个函数应该都转成folio的;不过随你们了;
- if (PageHuge(page))
return -EACCES;
- head = compound_head(page);
- if (!folio_isolate_lru(page_folio(head))) {
put_page(page);
return err;
- }
- put_page(page);
- if (PageUnevictable(page))
putback_lru_page(page);
- else
list_add_tail(&head->lru, pagelist);
- err = 0;
- return err;
+} +EXPORT_SYMBOL_GPL(add_page_for_swap);
+struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr) +{
- struct page *page;
- struct vm_area_struct *vma;
- unsigned int follflags;
- mmap_read_lock(mm);
- vma = find_vma(mm, vaddr);
- if (!vma || vaddr < vma->vm_start || vma->vm_flags & VM_LOCKED) {
mmap_read_unlock(mm);
return NULL;
- }
- follflags = FOLL_GET | FOLL_DUMP;
- page = follow_page(vma, vaddr, follflags);
- if (IS_ERR(page) || !page) {
mmap_read_unlock(mm);
return NULL;
- }
- mmap_read_unlock(mm);
- return page;
+} +EXPORT_SYMBOL_GPL(get_page_from_vaddr);