From: liubo liubo254@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4QVXW CVE: NA
------------------------------------------------- etmem, the memory vertical expansion technology,
The existing memory expansion tool etmem swaps out all pages that can be swapped out for the process by default, unless the page is marked with lock flag.
The function of swapping out specified pages is added. The process adds VM_SWAPFLAG flags for pages to be swapped out. The etmem adds filters to the scanning module and swaps out only these pages.
Signed-off-by: liubo liubo254@huawei.com Reviewed-by: Miaohe Lin linmiaohe@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/proc/etmem_scan.c | 32 ++++++++++++++++++++++++++ fs/proc/etmem_scan.h | 9 ++++++++ fs/proc/etmem_swap.c | 1 + fs/proc/task_mmu.c | 9 ++++++++ include/linux/mm.h | 2 ++ include/uapi/asm-generic/mman-common.h | 3 +++ mm/madvise.c | 9 +++++++- 7 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/fs/proc/etmem_scan.c b/fs/proc/etmem_scan.c index 1650208bad4c8..a436fa9280bb8 100644 --- a/fs/proc/etmem_scan.c +++ b/fs/proc/etmem_scan.c @@ -923,6 +923,11 @@ static int mm_idle_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; + struct page_idle_ctrl *pic = walk->private; + + /* If the specified page swapout is set, the untagged vma is skipped. */ + if ((pic->flags & VMA_SCAN_FLAG) && !(vma->vm_flags & VM_SWAPFLAG)) + return 1;
if (vma->vm_file) { if ((vma->vm_flags & (VM_WRITE|VM_MAYSHARE)) == VM_WRITE) @@ -1025,6 +1030,31 @@ static ssize_t mm_idle_read(struct file *file, char *buf, return ret; }
+static long page_scan_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + unsigned int flags; + + if (get_user(flags, (unsigned int __user *)argp)) + return -EFAULT; + + flags &= ALL_SCAN_FLAGS; + + switch (cmd) { + case VMA_SCAN_ADD_FLAGS: + filp->f_flags |= flags; + break; + case VMA_SCAN_REMOVE_FLAGS: + filp->f_flags &= ~flags; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + extern struct file_operations proc_page_scan_operations;
static int page_scan_entry(void) @@ -1034,6 +1064,7 @@ static int page_scan_entry(void) proc_page_scan_operations.read = page_scan_read; proc_page_scan_operations.open = page_scan_open; proc_page_scan_operations.release = page_scan_release; + proc_page_scan_operations.unlocked_ioctl = page_scan_ioctl; proc_page_scan_operations.flock(NULL, 0, NULL);
return 0; @@ -1046,6 +1077,7 @@ static void page_scan_exit(void) proc_page_scan_operations.read = NULL; proc_page_scan_operations.open = NULL; proc_page_scan_operations.release = NULL; + proc_page_scan_operations.unlocked_ioctl = NULL; proc_page_scan_operations.flock(NULL, 0, NULL); }
diff --git a/fs/proc/etmem_scan.h b/fs/proc/etmem_scan.h index 305739f92eef2..5deb7fb02f059 100644 --- a/fs/proc/etmem_scan.h +++ b/fs/proc/etmem_scan.h @@ -6,6 +6,15 @@ #define SCAN_SKIM_IDLE O_NOFOLLOW /* stop on PMD_IDLE_PTES */ #define SCAN_DIRTY_PAGE O_NOATIME /* report pte/pmd dirty bit */
+/* define to not used file flags */ +#define VMA_SCAN_FLAG 0x1000 /* scan the specifics vma with flag */ + +#define ALL_SCAN_FLAGS (SCAN_HUGE_PAGE | SCAN_SKIM_IDLE | SCAN_DIRTY_PAGE | VMA_SCAN_FLAG) + +#define IDLE_SCAN_MAGIC 0x66 +#define VMA_SCAN_ADD_FLAGS _IOW(IDLE_SCAN_MAGIC, 0x2, unsigned int) +#define VMA_SCAN_REMOVE_FLAGS _IOW(IDLE_SCAN_MAGIC, 0x3, unsigned int) + enum ProcIdlePageType { PTE_ACCESSED, /* 4k page */ PMD_ACCESSED, /* 2M page */ diff --git a/fs/proc/etmem_swap.c b/fs/proc/etmem_swap.c index aef9f9952848c..6eb422f5f3a34 100644 --- a/fs/proc/etmem_swap.c +++ b/fs/proc/etmem_swap.c @@ -63,6 +63,7 @@ static ssize_t swap_pages_write(struct file *file, const char __user *buf, ret = kstrtoul(p, 16, &vaddr); if (ret != 0) continue; + /* If get page struct failed, ignore it, get next page */ page = get_page_from_vaddr(mm, vaddr); if (!page) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4b324d102f512..495044e1990bd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1829,11 +1829,20 @@ static int mm_idle_release(struct inode *inode, struct file *file) return ret; }
+static long mm_idle_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + if (proc_page_scan_operations.unlocked_ioctl) + return proc_page_scan_operations.unlocked_ioctl(filp, cmd, arg); + + return 0; +} + const struct file_operations proc_mm_idle_operations = { .llseek = mem_lseek, /* borrow this */ .read = mm_idle_read, .open = mm_idle_open, .release = mm_idle_release, + .unlocked_ioctl = mm_idle_ioctl, };
static DEFINE_SPINLOCK(swap_lock); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b724d39e6ee0..be0be448c3f19 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -251,6 +251,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_USWAP 0x2000000000000000 #endif
+#define VM_SWAPFLAG 0x400000000000000 /* memory swap out flag in vma */ + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index e7ee32861d51d..58e55857258f0 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -66,6 +66,9 @@ #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+#define MADV_SWAPFLAG 203 /* memory swap flag, for memory to be swap out */ +#define MADV_SWAPFLAG_REMOVE 204 + /* compatibility flags */ #define MAP_FILE 0
diff --git a/mm/madvise.c b/mm/madvise.c index 1317267807b19..242a88ae3acf1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -129,6 +129,12 @@ static long madvise_behavior(struct vm_area_struct *vma, goto out; } break; + case MADV_SWAPFLAG: + new_flags |= VM_SWAPFLAG; + break; + case MADV_SWAPFLAG_REMOVE: + new_flags &= ~VM_SWAPFLAG; + break; }
if (new_flags == vma->vm_flags) { @@ -740,8 +746,9 @@ madvise_behavior_valid(int behavior) case MADV_SOFT_OFFLINE: case MADV_HWPOISON: #endif + case MADV_SWAPFLAG: + case MADV_SWAPFLAG_REMOVE: return true; - default: return false; }