From: Roman Gushchin guro@fb.com
mainline inclusion from mainline-5.3-rc1 commit 4d96ba3530750fae3f3f01150adfecde96157815 category: bugfix bugzilla: 34611 CVE: NA
------------------------------------------------- Every slab page charged to a non-root memory cgroup has a pointer to the memory cgroup and holds a reference to it, which protects a non-empty memory cgroup from being released. At the same time the page has a pointer to the corresponding kmem_cache, and also hold a reference to the kmem_cache. And kmem_cache by itself holds a reference to the cgroup.
So there is clearly some redundancy, which allows to stop setting the page->mem_cgroup pointer and rely on getting memcg pointer indirectly via kmem_cache. Further it will allow to change this pointer easier, without a need to go over all charged pages.
So let's stop setting page->mem_cgroup pointer for slab pages, and stop using the css refcounter directly for protecting the memory cgroup from going away. Instead rely on kmem_cache as an intermediate object.
Make sure that vmstats and shrinker lists are working as previously, as well as /proc/kpagecgroup interface.
Link: http://lkml.kernel.org/r/20190611231813.3148843-10-guro@fb.com Signed-off-by: Roman Gushchin guro@fb.com Acked-by: Vladimir Davydov vdavydov.dev@gmail.com Reviewed-by: Shakeel Butt shakeelb@google.com Cc: Christoph Lameter cl@linux.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Waiman Long longman@redhat.com Cc: David Rientjes rientjes@google.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Pekka Enberg penberg@kernel.org Cc: Andrei Vagin avagin@gmail.com Cc: Qian Cai cai@lca.pw Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 4d96ba3530750fae3f3f01150adfecde96157815) Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/list_lru.c | 3 +- mm/memcontrol.c | 12 ++++---- mm/slab.h | 74 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 19 deletions(-)
diff --git a/mm/list_lru.c b/mm/list_lru.c index 758653dd1443..19828ba88c48 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/memcontrol.h> +#include "slab.h"
#ifdef CONFIG_MEMCG_KMEM static LIST_HEAD(list_lrus); @@ -62,7 +63,7 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) if (!memcg_kmem_enabled()) return NULL; page = virt_to_head_page(ptr); - return page->mem_cgroup; + return memcg_from_slab_page(page); }
static inline struct list_lru_one * diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 94cba70d98d1..2a1e51175e91 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -495,7 +495,10 @@ ino_t page_cgroup_ino(struct page *page) unsigned long ino = 0;
rcu_read_lock(); - memcg = READ_ONCE(page->mem_cgroup); + if (PageHead(page) && PageSlab(page)) + memcg = memcg_from_slab_page(page); + else + memcg = READ_ONCE(page->mem_cgroup); while (memcg && !(memcg->css.flags & CSS_ONLINE)) memcg = parent_mem_cgroup(memcg); if (memcg) @@ -2706,9 +2709,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, cancel_charge(memcg, nr_pages); return -ENOMEM; } - - page->mem_cgroup = memcg; - return 0; }
@@ -2734,8 +2734,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order) goto out;
ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); - if (!ret) + if (!ret) { + page->mem_cgroup = memcg; __SetPageKmemcg(page); + } }
out: diff --git a/mm/slab.h b/mm/slab.h index 16049a33dffa..b26e30b88f08 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) return s->memcg_params.root_cache; }
+/* + * Expects a pointer to a slab page. Please note, that PageSlab() check + * isn't sufficient, as it returns true also for tail compound slab pages, + * which do not have slab_cache pointer set. + * So this function assumes that the page can pass PageHead() and PageSlab() + * checks. + */ +static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) +{ + struct kmem_cache *s; + + s = READ_ONCE(page->slab_cache); + if (s && !is_root_cache(s)) + return s->memcg_params.memcg; + + return NULL; +} + +/* + * Charge the slab page belonging to the non-root kmem_cache. + * Can be called for non-root kmem_caches only. + */ static __always_inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, struct kmem_cache *s) { + struct mem_cgroup *memcg; + struct lruvec *lruvec; int ret;
- if (is_root_cache(s)) - return 0; - - ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg); + memcg = s->memcg_params.memcg; + ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); if (ret) return ret;
+ lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order); + + /* transer try_charge() page references to kmem_cache */ percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); + css_put_many(&memcg->css, 1 << order);
return 0; }
+/* + * Uncharge a slab page belonging to a non-root kmem_cache. + * Can be called for non-root kmem_caches only. + */ static __always_inline void memcg_uncharge_slab(struct page *page, int order, struct kmem_cache *s) { - if (!is_root_cache(s)) - percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); - memcg_kmem_uncharge(page, order); + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + memcg = s->memcg_params.memcg; + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order)); + memcg_kmem_uncharge_memcg(page, order, memcg); + + percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); }
extern void slab_init_memcg_params(struct kmem_cache *); @@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) return s; }
+static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) +{ + return NULL; +} + static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, struct kmem_cache *s) { @@ -340,18 +382,24 @@ static __always_inline int charge_slab_page(struct page *page, gfp_t gfp, int order, struct kmem_cache *s) { - int ret = memcg_charge_slab(page, gfp, order, s); - - if (!ret) - mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order); + if (is_root_cache(s)) { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + 1 << order); + return 0; + }
- return ret; + return memcg_charge_slab(page, gfp, order, s); }
static __always_inline void uncharge_slab_page(struct page *page, int order, struct kmem_cache *s) { - mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order)); + if (is_root_cache(s)) { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + -(1 << order)); + return; + } + memcg_uncharge_slab(page, order, s); }