From: Roman Gushchin guro@fb.com
mainline inclusion from mainline-v5.11-rc1 commit 270c6a71460e12b07b1dcadf7457ff95b6c6e8f4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4C0GB CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
To gather all direct accesses to struct page's memcg_data field in one place, let's introduce 3 new helpers to use in the slab accounting code:
struct obj_cgroup **page_objcgs(struct page *page); struct obj_cgroup **page_objcgs_check(struct page *page); bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs);
They are similar to the corresponding API for generic pages, except that the setter can return false, indicating that the value has been already set from a different thread.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Alexei Starovoitov ast@kernel.org Reviewed-by: Shakeel Butt shakeelb@google.com Acked-by: Johannes Weiner hannes@cmpxchg.org Link: https://lkml.kernel.org/r/20201027001657.3398190-3-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-3-guro@fb.com Signed-off-by: Chen Huang chenhuang5@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 6 ++-- mm/slab.h | 35 +++++---------------- 3 files changed, 75 insertions(+), 30 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c23a842f4d80..05dcef20927c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -446,6 +446,70 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)memcg_data; }
+#ifdef CONFIG_MEMCG_KMEM +/* + * page_objcgs - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function assumes that the page is known to have an + * associated object cgroups vector. It's not safe to call this function + * against pages, which might have an associated memory cgroup: e.g. + * kernel stack pages. + */ +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); +} + +/* + * page_objcgs_check - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function is safe to use if the page can be directly associated + * with a memory cgroup. + */ +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (memcg_data && (memcg_data & 0x1UL)) + return (struct obj_cgroup **)(memcg_data & ~0x1UL); + + return NULL; +} + +/* + * set_page_objcgs - associate a page with a object cgroups vector + * @page: a pointer to the page struct + * @objcgs: a pointer to the object cgroups vector + * + * Atomically associates a page with a vector of object cgroups. + */ +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); +} +#else +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return NULL; +} + +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + return NULL; +} + +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return true; +} +#endif + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d6073bfd4a87..9eb2301dd6c5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2864,7 +2864,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, if (!vec) return -ENOMEM;
- if (cmpxchg(&page->memcg_data, 0, (unsigned long)vec | 0x1UL)) + if (!set_page_objcgs(page, vec)) kfree(vec); else kmemleak_not_leak(vec); @@ -2898,12 +2898,12 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) * Memcg membership data for each individual object is saved in * the page->obj_cgroups. */ - if (page_has_obj_cgroups(page)) { + if (page_objcgs_check(page)) { struct obj_cgroup *objcg; unsigned int off;
off = obj_to_index(page->slab_cache, page, p); - objcg = page_obj_cgroups(page)[off]; + objcg = page_objcgs(page)[off]; if (objcg) return obj_cgroup_memcg(objcg);
diff --git a/mm/slab.h b/mm/slab.h index ac43829b73d4..571757eb4a8f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -237,28 +237,12 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla }
#ifdef CONFIG_MEMCG_KMEM -static inline struct obj_cgroup **page_obj_cgroups(struct page *page) -{ - /* - * Page's memory cgroup and obj_cgroups vector are sharing the same - * space. To distinguish between them in case we don't know for sure - * that the page is a slab page (e.g. page_cgroup_ino()), let's - * always set the lowest bit of obj_cgroups. - */ - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL); -} - -static inline bool page_has_obj_cgroups(struct page *page) -{ - return page->memcg_data & 0x1UL; -} - int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp);
static inline void memcg_free_page_obj_cgroups(struct page *page) { - kfree(page_obj_cgroups(page)); + kfree(page_objcgs(page)); page->memcg_data = 0; }
@@ -329,7 +313,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, if (likely(p[i])) { page = virt_to_head_page(p[i]);
- if (!page_has_obj_cgroups(page) && + if (!page_objcgs(page) && memcg_alloc_page_obj_cgroups(page, s, flags)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; @@ -337,7 +321,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
off = obj_to_index(s, page, p[i]); obj_cgroup_get(objcg); - page_obj_cgroups(page)[off] = objcg; + page_objcgs(page)[off] = objcg; mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), obj_full_size(s)); } else { @@ -351,6 +335,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, void **p, int objects) { struct kmem_cache *s; + struct obj_cgroup **objcgs; struct obj_cgroup *objcg; struct page *page; unsigned int off; @@ -364,7 +349,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue;
page = virt_to_head_page(p[i]); - if (!page_has_obj_cgroups(page)) + objcgs = page_objcgs(page); + if (!objcgs) continue;
if (!s_orig) @@ -373,11 +359,11 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, s = s_orig;
off = obj_to_index(s, page, p[i]); - objcg = page_obj_cgroups(page)[off]; + objcg = objcgs[off]; if (!objcg) continue;
- page_obj_cgroups(page)[off] = NULL; + objcgs[off] = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), -obj_full_size(s)); @@ -386,11 +372,6 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, }
#else /* CONFIG_MEMCG_KMEM */ -static inline bool page_has_obj_cgroups(struct page *page) -{ - return false; -} - static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) { return NULL;