From: Roman Gushchin guro@fb.com
mainline inclusion from mainline-v5.11-rc1 commit 18b2db3b0385226b71cb3288474fa5a6e4a45474 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4C0GB CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
PageKmemcg flag is currently defined as a page type (like buddy, offline, table and guard). Semantically it means that the page was accounted as a kernel memory by the page allocator and has to be uncharged on the release.
As a side effect of defining the flag as a page type, the accounted page can't be mapped to userspace (look at page_has_type() and comments above). In particular, this blocks the accounting of vmalloc-backed memory used by some bpf maps, because these maps do map the memory to userspace.
One option is to fix it by complicating the access to page->mapcount, which provides some free bits for page->page_type.
But it's way better to move this flag into page->memcg_data flags. Indeed, the flag makes no sense without enabled memory cgroups and memory cgroup pointer set in particular.
This commit replaces PageKmemcg() and __SetPageKmemcg() with PageMemcgKmem() and an open-coded OR operation setting the memcg pointer with the MEMCG_DATA_KMEM bit. __ClearPageKmemcg() can be simple deleted, as the whole memcg_data is zeroed at once.
As a bonus, on !CONFIG_MEMCG build the PageMemcgKmem() check will be compiled out.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Alexei Starovoitov ast@kernel.org Reviewed-by: Shakeel Butt shakeelb@google.com Acked-by: Johannes Weiner hannes@cmpxchg.org Acked-by: Michal Hocko mhocko@suse.com Link: https://lkml.kernel.org/r/20201027001657.3398190-5-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-5-guro@fb.com
Conflicts: mm/memcontrol.c Signed-off-by: Chen Huang chenhuang5@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/memcontrol.h | 37 +++++++++++++++++++++++++++++++++---- include/linux/page-flags.h | 11 ++--------- mm/memcontrol.c | 20 +++++++------------- mm/page_alloc.c | 4 ++-- 4 files changed, 44 insertions(+), 28 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d5424564bc69..ab6ea36744bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -376,8 +376,10 @@ extern struct mem_cgroup *root_mem_cgroup; enum page_memcg_data_flags { /* page->memcg_data is a pointer to an objcgs vector */ MEMCG_DATA_OBJCGS = (1UL << 0), + /* page has been accounted as a non-slab kernel page */ + MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */ - __NR_MEMCG_DATA_FLAGS = (1UL << 1), + __NR_MEMCG_DATA_FLAGS = (1UL << 2), };
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) @@ -399,8 +401,12 @@ enum page_memcg_data_flags { */ static inline struct mem_cgroup *page_memcg(struct page *page) { + unsigned long memcg_data = page->memcg_data; + VM_BUG_ON_PAGE(PageSlab(page), page); - return (struct mem_cgroup *)page->memcg_data; + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); }
/* @@ -417,7 +423,8 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held());
- return (struct mem_cgroup *)READ_ONCE(page->memcg_data); + return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & + ~MEMCG_DATA_FLAGS_MASK); }
/* @@ -446,7 +453,21 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL;
- return (struct mem_cgroup *)memcg_data; + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * PageMemcgKmem - check if the page has MemcgKmem flag set + * @page: a pointer to the page struct + * + * Checks if the page has MemcgKmem flag set. The caller must ensure that + * the page has an associated memory cgroup. It's not safe to call this function + * against some types of pages, e.g. slab pages. + */ +static inline bool PageMemcgKmem(struct page *page) +{ + VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); + return page->memcg_data & MEMCG_DATA_KMEM; }
#ifdef CONFIG_MEMCG_KMEM @@ -465,6 +486,7 @@ static inline struct obj_cgroup **page_objcgs(struct page *page) unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -484,6 +506,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) return NULL;
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); }
@@ -1173,6 +1197,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; }
+static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e470b34f94ff..5fed83d29ea8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -710,9 +710,8 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -#define PG_kmemcg 0x00000200 -#define PG_table 0x00000400 -#define PG_guard 0x00000800 +#define PG_table 0x00000200 +#define PG_guard 0x00000400
#define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -763,12 +762,6 @@ PAGE_TYPE_OPS(Buddy, buddy) */ PAGE_TYPE_OPS(Offline, offline)
-/* - * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on - * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. - */ -PAGE_TYPE_OPS(Kmemcg, kmemcg) - /* * Marks pages in use as page tables. */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9eb2301dd6c5..ca82afb1bdff 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3054,8 +3054,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) if (memcg && !mem_cgroup_is_root(memcg)) { ret = __memcg_kmem_charge(memcg, gfp, 1 << order); if (!ret) { - page->memcg_data = (unsigned long)memcg; - __SetPageKmemcg(page); + page->memcg_data = (unsigned long)memcg | + MEMCG_DATA_KMEM; return 0; } css_put(&memcg->css); @@ -3080,10 +3080,6 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) __memcg_kmem_uncharge(memcg, nr_pages); page->memcg_data = 0; css_put(&memcg->css); - - /* slab pages do not have PageKmemcg flag set */ - if (PageKmemcg(page)) - __ClearPageKmemcg(page); }
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) @@ -3244,8 +3240,8 @@ void split_page_memcg(struct page *head, unsigned int nr)
for (i = 1; i < nr; i++) { head[i].memcg_data = (unsigned long)memcg; - if (PageKmemcg(head)) - __SetPageKmemcg(head + i); + if (PageMemcgKmem(head)) + head[i].memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM; } css_get_many(&memcg->css, nr - 1); } @@ -7044,12 +7040,10 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) nr_pages = compound_nr(page); ug->nr_pages += nr_pages;
- if (!PageKmemcg(page)) { - ug->pgpgout++; - } else { + if (PageMemcgKmem(page)) ug->nr_kmem += nr_pages; - __ClearPageKmemcg(page); - } + else + ug->pgpgout++;
ug->dummy_page = page; page->memcg_data = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ad4a794b87f5..9f29b6480550 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1218,7 +1218,7 @@ static __always_inline bool free_pages_prepare(struct page *page, * Do not let hwpoison pages hit pcplists/buddy * Untie memcg state and reset page's owner */ - if (memcg_kmem_enabled() && PageKmemcg(page)) + if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); reset_page_owner(page, order); return false; @@ -1248,7 +1248,7 @@ static __always_inline bool free_pages_prepare(struct page *page, } if (PageMappingFlags(page)) page->mapping = NULL; - if (memcg_kmem_enabled() && PageKmemcg(page)) + if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); if (check_free) bad += check_free_page(page);