From: Roman Gushchin guro@fb.com
mainline inclusion from mainline-v5.3-rc5 commit ec9f02384f6053f2a5417e82b65078edc5364a8d category: bugfix bugzilla: 34611 CVE: NA
-------------------------------------------------
Memcg counters for shadow nodes are broken because the memcg pointer is obtained in a wrong way. The following approach is used: virt_to_page(xa_node)->mem_cgroup
Since commit 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") page->mem_cgroup pointer isn't set for slab pages, so memcg_from_slab_page() should be used instead.
Also I doubt that it ever worked correctly: virt_to_head_page() should be used instead of virt_to_page(). Otherwise objects residing on tail pages are not accounted, because only the head page contains a valid mem_cgroup pointer. That was a case since the introduction of these counters by the commit 68d48e6a2df5 ("mm: workingset: add vmstat counter for shadow nodes").
Link: http://lkml.kernel.org/r/20190801233532.138743-1-guro@fb.com Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") Signed-off-by: Roman Gushchin guro@fb.com Acked-by: Johannes Weiner hannes@cmpxchg.org Cc: Vladimir Davydov vdavydov.dev@gmail.com Cc: Shakeel Butt shakeelb@google.com Cc: Michal Hocko mhocko@suse.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/memcontrol.h | 19 +++++++++++++++++++ mm/memcontrol.c | 20 ++++++++++++++++++++ mm/workingset.c | 2 +- 3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8c7b83cee2d7..5559d2ac442b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -676,6 +676,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); +void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) @@ -1077,6 +1078,14 @@ static inline void mod_lruvec_page_state(struct page *page, mod_node_page_state(page_pgdat(page), idx, val); }
+static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, + int val) +{ + struct page *page = virt_to_head_page(p); + + __mod_node_page_state(page_pgdat(page), idx, val); +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1158,6 +1167,16 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); }
+static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx) +{ + __mod_lruvec_slab_state(p, idx, 1); +} + +static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx) +{ + __mod_lruvec_slab_state(p, idx, -1); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void inc_memcg_state(struct mem_cgroup *memcg, int idx) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index be50582ae13e..74722218768a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -776,6 +776,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); }
+void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) +{ + struct page *page = virt_to_head_page(p); + pg_data_t *pgdat = page_pgdat(page); + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = memcg_from_slab_page(page); + + /* Untracked pages have no memcg, no lruvec. Update only the node */ + if (!memcg || memcg == root_mem_cgroup) { + __mod_node_page_state(pgdat, idx, val); + } else { + lruvec = mem_cgroup_lruvec(pgdat, memcg); + __mod_lruvec_state(lruvec, idx, val); + } + rcu_read_unlock(); +} + /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup diff --git a/mm/workingset.c b/mm/workingset.c index 15b361bf6da6..d422f396ab64 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -502,7 +502,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, } if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; - inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); + __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->i_pages, node, workingset_lookup_update(mapping));