From: Liu Shixin liushixin2@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8QK6Q CVE: NA
--------------------------------
Add anon/file to memory.reclaim interface to limit only reclaim one type pages. The lru algorithm can reclaim cold pages and balance between file and anon. But it didn't consider the speed of backend device. For example, if there is zram device, reclaim anon pages might has less impact on performance. So extend memory.reclaim interface to reclaim one type pages. Usage: "echo <size> type=anon > memory.reclaim" "echo <size> type=file > memory.reclaim"
Also compatible with the previous format.
Signed-off-by: Liu Shixin liushixin2@huawei.com Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- Documentation/admin-guide/cgroup-v2.rst | 10 ++-- include/linux/swap.h | 1 + mm/memcontrol.c | 67 +++++++++++++++++++++++-- mm/vmscan.c | 9 ++++ 4 files changed, 80 insertions(+), 7 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index b26b5274eaaf..84cbbeaf0d78 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1247,15 +1247,17 @@ PAGE_SIZE multiple when read back. target cgroup.
This file accepts a single key, the number of bytes to reclaim. - No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
- The interface can be later extended with nested keys to - configure the reclaim behavior. For example, specify the - type of memory to reclaim from (anon, file, ..). + This file also accepts nested keys, the number of bytes to reclaim + with the type of memory to reclaim. + + Example:: + echo "1G type=file" > memory.reclaim + echo "1G type=anon" > memory.reclaim
Please note that the kernel can over or under reclaim from the target cgroup. If less bytes are reclaimed than the diff --git a/include/linux/swap.h b/include/linux/swap.h index fe20c462fecb..1c4c86812e96 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -419,6 +419,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_MAY_SWAP (1 << 1) #define MEMCG_RECLAIM_PROACTIVE (1 << 2) +#define MEMCG_RECLAIM_NOT_FILE (1 << 3) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e80504a49c0..aab3ecea6847 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -65,6 +65,7 @@ #include <linux/seq_buf.h> #include <linux/memcg_memfs_info.h> #include <linux/sched/isolation.h> +#include <linux/parser.h> #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -7308,6 +7309,62 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; }
+enum { + MEMORY_RECLAIM_TYPE = 0, + MEMORY_RECLAIM_NULL, +}; + +static const match_table_t tokens = { + { MEMORY_RECLAIM_TYPE, "type=%s"}, + { MEMORY_RECLAIM_NULL, NULL }, +}; + +#define RECLAIM_TYPE_SIZE 8 + +static int reclaim_param_parse(char *buf, unsigned long *nr_pages, + unsigned int *reclaim_options) +{ + char *old_buf, *start; + char type[RECLAIM_TYPE_SIZE]; + substring_t args[MAX_OPT_ARGS]; + u64 bytes; + + buf = strstrip(buf); + if (!strcmp(buf, "")) { + *nr_pages = PAGE_COUNTER_MAX; + return 0; + } + + old_buf = buf; + bytes = memparse(buf, &buf); + if (buf == old_buf) + return -EINVAL; + + *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); + + buf = strstrip(buf); + while ((start = strsep(&buf, " ")) != NULL) { + if (!strlen(start)) + continue; + + switch (match_token(start, tokens, args)) { + case MEMORY_RECLAIM_TYPE: + match_strlcpy(type, &args[0], RECLAIM_TYPE_SIZE); + if (!strcmp(type, "anon")) + *reclaim_options |= MEMCG_RECLAIM_NOT_FILE; + else if (!strcmp(type, "file")) + *reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP; + else + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + return 0; +} + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -7317,18 +7374,22 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, unsigned int reclaim_options; int err;
- buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); + reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + err = reclaim_param_parse(buf, &nr_to_reclaim, &reclaim_options); if (err) return err;
- reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; while (nr_reclaimed < nr_to_reclaim) { unsigned long reclaimed;
if (signal_pending(current)) return -EINTR;
+ /* If only reclaim swap pages, check swap space at first. */ + if ((reclaim_options & MEMCG_RECLAIM_NOT_FILE) && + (mem_cgroup_get_nr_swap_pages(memcg) <= 0)) + return -EAGAIN; + /* * This is the final attempt, drain percpu lru caches in the * hope of introducing more evictable pages for diff --git a/mm/vmscan.c b/mm/vmscan.c index 7a676296af30..6461552c81d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -109,6 +109,9 @@ struct scan_control { /* Can folios be swapped as part of reclaim? */ unsigned int may_swap:1;
+ /* Should skip file pages? */ + unsigned int not_file:1; + /* Proactive reclaim invoked by userspace through memory.reclaim */ unsigned int proactive:1;
@@ -3035,6 +3038,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long ap, fp; enum lru_list lru;
+ if (sc->not_file) { + scan_balance = SCAN_ANON; + goto out; + } + /* If we have no swap space, do not bother scanning anon folios. */ if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { scan_balance = SCAN_FILE; @@ -7141,6 +7149,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), + .not_file = !!(reclaim_options & MEMCG_RECLAIM_NOT_FILE), }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put