From: Liu Shixin liushixin2@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8QK6Q CVE: NA
--------------------------------
Add a new per-memcg swapin interface to load data into memory in advance to improve access efficiency. Usage: # echo 0 > memory.force_swapin
Signed-off-by: Liu Shixin liushixin2@huawei.com Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- .../admin-guide/cgroup-v1/memory.rst | 1 + include/linux/mm.h | 4 ++ mm/madvise.c | 31 ++++++++++--- mm/memcontrol.c | 45 +++++++++++++++++++ 4 files changed, 76 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 66ae60dead2e..9bbd489136b2 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -83,6 +83,7 @@ Brief summary of control files. This knob is deprecated and shouldn't be used. memory.force_empty trigger forced page reclaim + memory.force_swapin trigger forced swapin anon page memory.pressure_level set memory pressure notifications memory.swappiness set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) diff --git a/include/linux/mm.h b/include/linux/mm.h index 80bacc4da324..f078aa6b493c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3313,6 +3313,10 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
+#ifdef CONFIG_MEMCG_SWAP_QOS +extern void force_swapin_vma(struct vm_area_struct *vma); +#endif + #ifdef CONFIG_MMU extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, diff --git a/mm/madvise.c b/mm/madvise.c index 4dded5d27e7e..2d56815daff2 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -185,6 +185,11 @@ static int madvise_update_vma(struct vm_area_struct *vma, return 0; }
+static inline bool can_madv_lru_vma(struct vm_area_struct *vma) +{ + return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); +} + #ifdef CONFIG_SWAP static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) @@ -273,6 +278,27 @@ static void shmem_swapin_range(struct vm_area_struct *vma, } #endif /* CONFIG_SWAP */
+#ifdef CONFIG_MEMCG_SWAP_QOS +void force_swapin_vma(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + + if (!can_madv_lru_vma(vma)) + return; + + if (!file) { + walk_page_vma(vma, &swapin_walk_ops, vma); + lru_add_drain(); + } else if (shmem_mapping(file->f_mapping)) + shmem_swapin_range(vma, vma->vm_start, + vma->vm_end, file->f_mapping); +} +#else +void force_swapin_vma(struct vm_area_struct *vma) +{ +} +#endif + /* * Schedule all required I/O operations. Do not wait for completion. */ @@ -555,11 +581,6 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, tlb_end_vma(tlb, vma); }
-static inline bool can_madv_lru_vma(struct vm_area_struct *vma) -{ - return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); -} - static long madvise_cold(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 98fea5dacd40..deebbfcd5bf6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -66,6 +66,11 @@ #include <linux/memcg_memfs_info.h> #include <linux/sched/isolation.h> #include <linux/parser.h> + +#ifdef CONFIG_MEMCG_SWAP_QOS +#include <linux/blkdev.h> +#endif + #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -4271,6 +4276,39 @@ static int sysctl_memcg_swap_qos_handler(struct ctl_table *table, int write, return 0; } #endif + +static int mem_cgroup_task_swapin(struct task_struct *task, void *arg) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + struct blk_plug plug; + VMA_ITERATOR(vmi, mm, 0); + + if (__task_is_dying(task)) + return 0; + if (!mm || !mmget_not_zero(mm)) + return 0; + + mmap_read_lock(mm); + blk_start_plug(&plug); + for_each_vma(vmi, vma) + force_swapin_vma(vma); + blk_finish_plug(&plug); + mmap_read_unlock(mm); + mmput(mm); + + return 0; +} + +static ssize_t memory_swapin(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + + mem_cgroup_scan_tasks(memcg, mem_cgroup_task_swapin, NULL); + + return nbytes; +} #endif
#ifdef CONFIG_NUMA @@ -5762,6 +5800,13 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = memory_ksm_write, .seq_show = memory_ksm_show, }, +#endif +#ifdef CONFIG_MEMCG_SWAP_QOS + { + .name = "force_swapin", + .flags = CFTYPE_NOT_ON_ROOT, + .write = memory_swapin, + }, #endif { }, /* terminate */ };