hulk inclusion category: bugfix bugzilla: NA
--------------------------------
Add memory.file_high in memcgv1, which is similar with memory.high. But, just when pagecache usages in memcg is higher than memory.file_high, reclaimable pagecache will be reclaim.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/memcontrol.h | 3 +- include/linux/tracehook.h | 1 + mm/memcontrol.c | 72 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6b63b39cc24d..017865199e83 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -406,7 +406,7 @@ struct mem_cgroup { KABI_RESERVE(3) KABI_RESERVE(4) #endif - KABI_RESERVE(5) + KABI_USE(5, unsigned long file_high) #if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_ARM64) KABI_USE(6, struct dhugetlb_pool *hpool) #else @@ -1017,6 +1017,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, }
void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_file_high(void);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 8280243be3c8..7f945b76c306 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -195,6 +195,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) #endif
mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_file_high(); blkcg_maybe_throttle_current(); #ifdef CONFIG_QOS_SCHED sched_qos_offline_wait(); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index aefaa33147b1..8d4d9c4ee8e5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -736,6 +736,24 @@ static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) return x; }
+static unsigned long memcg_file_limit(struct mem_cgroup *memcg) +{ + unsigned long nr_pages, file_high; + unsigned long nr_reclaimed = 0; + + mem_cgroup_flush_stats(); + do { + nr_pages = memcg_page_state(memcg, NR_FILE_PAGES); + file_high = READ_ONCE(memcg->file_high); + if(nr_pages > file_high) + nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages - file_high, + GFP_KERNEL, 0); + } while ((memcg = parent_mem_cgroup(memcg)) && + !mem_cgroup_is_root(memcg)); + + return nr_reclaimed; +} + /* idx can be of type enum memcg_stat_item or node_stat_item. */ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { @@ -2441,6 +2459,7 @@ static void high_work_func(struct work_struct *work) else #endif reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + memcg_file_limit(memcg); }
/* @@ -2666,6 +2685,19 @@ void mem_cgroup_handle_over_high(void) css_put(&memcg->css); }
+void mem_cgroup_handle_over_file_high(void) +{ + struct mem_cgroup *memcg; + int nr_retries = MAX_RECLAIM_RETRIES; + unsigned long nr_reclaimed = 0; + + memcg = get_mem_cgroup_from_mm(current->mm); +retry_reclaim: + nr_reclaimed = memcg_file_limit(memcg); + if (nr_reclaimed || nr_retries--) + goto retry_reclaim; + css_put(&memcg->css); +} static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned int nr_pages) { @@ -2825,16 +2857,18 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, * reclaim, the cost of mismatch is negligible. */ do { - bool mem_high, swap_high; + bool mem_high, swap_high, file_high;
mem_high = page_counter_read(&memcg->memory) > READ_ONCE(memcg->memory.high); swap_high = page_counter_read(&memcg->swap) > READ_ONCE(memcg->swap.high); + file_high = memcg_page_state(memcg, NR_FILE_PAGES) > + READ_ONCE(memcg->file_high);
/* Don't bother a random interrupted task */ if (in_interrupt()) { - if (mem_high) { + if (mem_high || file_high) { schedule_work(&memcg->high_work); break; } @@ -2855,7 +2889,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, } #endif
- if (mem_high || swap_high) { + if (mem_high || swap_high || file_high) { /* * The allocating tasks in this cgroup will need to do * reclaim or be throttled to prevent further growth @@ -5660,6 +5694,31 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, return nbytes; }
+static int memory_file_high_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->file_high)); +} + +static ssize_t memory_file_high_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long file_high; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &file_high); + if (err) + return err; + + WRITE_ONCE(memcg->file_high, file_high); + memcg_file_limit(memcg); + + return nbytes; +} + + #ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS static void __memcg_events_show(struct seq_file *m, atomic_long_t *events) { @@ -6214,6 +6273,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memory_high_show, .write = memory_high_write, }, + { + .name = "file_high", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_file_high_show, + .write = memory_file_high_write, + }, { .name = "events", .flags = CFTYPE_NOT_ON_ROOT, @@ -6497,6 +6562,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_CAST(memcg);
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); + memcg->file_high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; #ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE;