Add a new per-memcg swapin interface to load data into memory in advance to improve access efficiency. Usage: # echo 0 > memory.force_swapin
Signed-off-by: Liu Shixin liushixin2@huawei.com --- .../admin-guide/cgroup-v1/memory.rst | 1 + include/linux/mm.h | 1 + mm/Kconfig | 8 ++++ mm/madvise.c | 19 ++++++++++ mm/memcontrol.c | 38 +++++++++++++++++++ 5 files changed, 67 insertions(+)
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 6859a50fbd09..5eee7e3be4b2 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -78,6 +78,7 @@ Brief summary of control files. memory.stat show various statistics memory.use_hierarchy set/show hierarchical account enabled memory.force_empty trigger forced page reclaim + memory.force_swapin trigger forced swapin anon page memory.pressure_level set memory pressure notifications memory.swappiness set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0b5ce84212d7..58d7a59b5b65 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2650,6 +2650,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); +extern void force_swapin_vma(struct vm_area_struct *vma); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
extern unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, diff --git a/mm/Kconfig b/mm/Kconfig index f66457168de9..4c7569970c69 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -512,6 +512,14 @@ config MEMCG_QOS
If unsure, say "n".
+config MEMCG_SWAP_QOS + bool "Enable Memory Cgroup Swap priority" + depends on MEMCG_SWAP + depends on X86 || ARM64 + default n + help + Support swapin memory for memcg. + config ETMEM_SCAN tristate "module: etmem page scan for etmem support" depends on ETMEM diff --git a/mm/madvise.c b/mm/madvise.c index 0a1d6f9d75ea..6028383a8147 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -259,6 +259,25 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
lru_add_drain(); /* Push any new pages onto the LRU now */ } + +void force_swapin_vma(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + + if (!can_madv_lru_vma(vma)) + return; + + if (!file) { + walk_page_vma(vma, &swapin_walk_ops, vma); + lru_add_drain(); + } else if (shmem_mapping(file->f_mapping)) + force_shm_swapin_readahead(vma, vma->vm_start, + vma->vm_end, file->f_mapping); +} +#else +void force_swapin_vma(struct vm_area_struct *vma) +{ +} #endif /* CONFIG_SWAP */
/* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8f796b651baa..10e38edf6b87 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5336,6 +5336,37 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, return nbytes; }
+#ifdef CONFIG_MEMCG_SWAP_QOS +static int mem_cgroup_task_swapin(struct task_struct *task, void *arg) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + struct blk_plug plug; + + mmap_read_lock(mm); + blk_start_plug(&plug); + for (vma = mm->mmap; vma; vma = vma->vm_next) + force_swapin_vma(vma); + blk_finish_plug(&plug); + mmap_read_unlock(mm); + + return 0; +} + +static ssize_t memory_swapin(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + + if (unlikely(mem_cgroup_is_root(memcg))) + return -EINVAL; + + mem_cgroup_scan_tasks(memcg, mem_cgroup_task_swapin, NULL); + + return nbytes; +} +#endif + static int memcg_high_async_ratio_show(struct seq_file *m, void *v) { seq_printf(m, "%d\n", @@ -5738,6 +5769,13 @@ static struct cftype mem_cgroup_legacy_files[] = { .name = "reclaim", .write = memory_reclaim, }, +#ifdef CONFIG_MEMCG_SWAP_QOS + { + .name = "force_swapin", + .flags = CFTYPE_NOT_ON_ROOT, + .write = memory_swapin, + }, +#endif { .name = "high_async_ratio", .flags = CFTYPE_NOT_ON_ROOT,