hulk inclusion
category: bugfix
bugzilla: NA
--------------------------------
Add memory.file_high in memcgv1, which is similar with
memory.high. But, just when pagecache usages in memcg is higher
than memory.file_high, reclaimable pagecache will be reclaim.
Signed-off-by: Lu Jialin <lujialin4(a)huawei.com>
---
include/linux/memcontrol.h | 3 +-
include/linux/tracehook.h | 1 +
mm/memcontrol.c | 72 ++++++++++++++++++++++++++++++++++++--
3 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6b63b39cc24d..017865199e83 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -406,7 +406,7 @@ struct mem_cgroup {
KABI_RESERVE(3)
KABI_RESERVE(4)
#endif
- KABI_RESERVE(5)
+ KABI_USE(5, unsigned long file_high)
#if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_ARM64)
KABI_USE(6, struct dhugetlb_pool *hpool)
#else
@@ -1017,6 +1017,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
}
void mem_cgroup_handle_over_high(void);
+void mem_cgroup_handle_over_file_high(void);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 8280243be3c8..7f945b76c306 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -195,6 +195,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
#endif
mem_cgroup_handle_over_high();
+ mem_cgroup_handle_over_file_high();
blkcg_maybe_throttle_current();
#ifdef CONFIG_QOS_SCHED
sched_qos_offline_wait();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index aefaa33147b1..8d4d9c4ee8e5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -736,6 +736,24 @@ static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
return x;
}
+static unsigned long memcg_file_limit(struct mem_cgroup *memcg)
+{
+ unsigned long nr_pages, file_high;
+ unsigned long nr_reclaimed = 0;
+
+ mem_cgroup_flush_stats();
+ do {
+ nr_pages = memcg_page_state(memcg, NR_FILE_PAGES);
+ file_high = READ_ONCE(memcg->file_high);
+ if(nr_pages > file_high)
+ nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages - file_high,
+ GFP_KERNEL, 0);
+ } while ((memcg = parent_mem_cgroup(memcg)) &&
+ !mem_cgroup_is_root(memcg));
+
+ return nr_reclaimed;
+}
+
/* idx can be of type enum memcg_stat_item or node_stat_item. */
static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
{
@@ -2441,6 +2459,7 @@ static void high_work_func(struct work_struct *work)
else
#endif
reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
+ memcg_file_limit(memcg);
}
/*
@@ -2666,6 +2685,19 @@ void mem_cgroup_handle_over_high(void)
css_put(&memcg->css);
}
+void mem_cgroup_handle_over_file_high(void)
+{
+ struct mem_cgroup *memcg;
+ int nr_retries = MAX_RECLAIM_RETRIES;
+ unsigned long nr_reclaimed = 0;
+
+ memcg = get_mem_cgroup_from_mm(current->mm);
+retry_reclaim:
+ nr_reclaimed = memcg_file_limit(memcg);
+ if (nr_reclaimed || nr_retries--)
+ goto retry_reclaim;
+ css_put(&memcg->css);
+}
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
@@ -2825,16 +2857,18 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
* reclaim, the cost of mismatch is negligible.
*/
do {
- bool mem_high, swap_high;
+ bool mem_high, swap_high, file_high;
mem_high = page_counter_read(&memcg->memory) >
READ_ONCE(memcg->memory.high);
swap_high = page_counter_read(&memcg->swap) >
READ_ONCE(memcg->swap.high);
+ file_high = memcg_page_state(memcg, NR_FILE_PAGES) >
+ READ_ONCE(memcg->file_high);
/* Don't bother a random interrupted task */
if (in_interrupt()) {
- if (mem_high) {
+ if (mem_high || file_high) {
schedule_work(&memcg->high_work);
break;
}
@@ -2855,7 +2889,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
}
#endif
- if (mem_high || swap_high) {
+ if (mem_high || swap_high || file_high) {
/*
* The allocating tasks in this cgroup will need to do
* reclaim or be throttled to prevent further growth
@@ -5660,6 +5694,31 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
return nbytes;
}
+static int memory_file_high_show(struct seq_file *m, void *v)
+{
+ return seq_puts_memcg_tunable(m,
+ READ_ONCE(mem_cgroup_from_seq(m)->file_high));
+}
+
+static ssize_t memory_file_high_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned long file_high;
+ int err;
+
+ buf = strstrip(buf);
+ err = page_counter_memparse(buf, "max", &file_high);
+ if (err)
+ return err;
+
+ WRITE_ONCE(memcg->file_high, file_high);
+ memcg_file_limit(memcg);
+
+ return nbytes;
+}
+
+
#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS
static void __memcg_events_show(struct seq_file *m, atomic_long_t *events)
{
@@ -6214,6 +6273,12 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = memory_high_show,
.write = memory_high_write,
},
+ {
+ .name = "file_high",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = memory_file_high_show,
+ .write = memory_file_high_write,
+ },
{
.name = "events",
.flags = CFTYPE_NOT_ON_ROOT,
@@ -6497,6 +6562,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
return ERR_CAST(memcg);
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
+ memcg->file_high = PAGE_COUNTER_MAX;
memcg->soft_limit = PAGE_COUNTER_MAX;
#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS
memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE;
--
2.34.1