Add anon/file to memory.reclaim interface to limit only reclaim one type pages. The lru algorithm can reclaim cold pages and balance between file and anon. But it didn't consider the speed of backend device. For example, if there is zram device, reclaim anon pages might has less impact on performance. So extend memory.reclaim interface to reclaim one type pages. Usage: "echo <size> type=anon > memory.reclaim" "echo <size> type=file > memory.reclaim"
Also compatible with the previous format.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- Documentation/admin-guide/cgroup-v2.rst | 9 ++++--- include/linux/swap.h | 4 +++ mm/memcontrol.c | 36 ++++++++++++++++++++++--- mm/vmscan.c | 17 ++++++++++++ 4 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 5d9b7e552fb0..a04ca490f58c 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1196,15 +1196,16 @@ PAGE_SIZE multiple when read back. target cgroup.
This file accepts a single key, the number of bytes to reclaim. - No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
- The interface can be later extended with nested keys to - configure the reclaim behavior. For example, specify the - type of memory to reclaim from (anon, file, ..). + This file also accepts nested keys, the number of bytes to reclaim + with the type of memory to reclaim. + + Example:: + echo "1G type=file" > memory.reclaim
Please note that the kernel can over or under reclaim from the target cgroup. If less bytes are reclaimed than the diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f49964f27d2..b98b4c9df622 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -380,6 +380,10 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap); +extern unsigned long __try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, + bool may_swap, bool only_swap); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 662c7859b7f1..8f796b651baa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5263,16 +5263,46 @@ static int memcg_events_local_show(struct seq_file *m, void *v) return 0; }
+static int reclaim_param_parse(char *buf, unsigned long *nr_pages, + bool *anon, bool *file) +{ + char *endp; + u64 bytes; + + if (!strcmp(buf, "")) { + *nr_pages = PAGE_COUNTER_MAX; + return 0; + } + + bytes = memparse(buf, &endp); + if (*endp == ' ') { + buf = endp + 1; + buf = strim(buf); + if (!strcmp(buf, "type=anon")) + *file = false; + else if (!strcmp(buf, "type=file")) + *anon = false; + else + return -EINVAL; + } else if (*endp != '\0') + return -EINVAL; + + *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); + + return 0; +} + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned int nr_retries = MAX_RECLAIM_RETRIES; unsigned long nr_to_reclaim, nr_reclaimed = 0; + bool anon = true, file = true; int err;
buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); + err = reclaim_param_parse(buf, &nr_to_reclaim, &anon, &file); if (err) return err;
@@ -5293,9 +5323,9 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, if (!nr_retries) lru_add_drain_all();
- reclaimed = try_to_free_mem_cgroup_pages(memcg, + reclaimed = __try_to_free_mem_cgroup_pages(memcg, nr_to_reclaim - nr_reclaimed, - GFP_KERNEL, true); + GFP_KERNEL, anon, !file);
if (!reclaimed && !nr_retries--) return -EAGAIN; diff --git a/mm/vmscan.c b/mm/vmscan.c index a8412c5d4eda..2fce47af0e83 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -102,6 +102,8 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; + /* Only swap anon pages */ + unsigned int only_swap:1;
/* * Cgroup memory below memory.low is protected as long as we @@ -2461,6 +2463,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long ap, fp; enum lru_list lru;
+ if (sc->only_swap) { + scan_balance = SCAN_ANON; + goto out; + } + /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { scan_balance = SCAN_FILE; @@ -3563,6 +3570,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap) +{ + return __try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, + may_swap, false); +} + +unsigned long __try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, + bool may_swap, bool only_swap) { unsigned long nr_reclaimed; unsigned int noreclaim_flag; @@ -3576,6 +3592,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = may_swap, + .only_swap = only_swap, }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put