From: Chen Ridong <chenridong@huawei.com> mainline inclusion from mainline-v6.19 commit 99a2ef500906138ba58093b9893972a5c303c734 category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- An UAF issue was observed: BUG: KASAN: slab-use-after-free in page_counter_uncharge+0x65/0x150 Write of size 8 at addr ffff888106715440 by task insmod/527 CPU: 4 UID: 0 PID: 527 Comm: insmod 6.19.0-rc7-next-20260129+ #11 Tainted: [O]=OOT_MODULE Call Trace: <TASK> dump_stack_lvl+0x82/0xd0 kasan_report+0xca/0x100 kasan_check_range+0x39/0x1c0 page_counter_uncharge+0x65/0x150 dmem_cgroup_uncharge+0x1f/0x260 Allocated by task 527: Freed by task 0: The buggy address belongs to the object at ffff888106715400 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 64 bytes inside of freed 512-byte region [ffff888106715400, ffff888106715600) The buggy address belongs to the physical page: Memory state around the buggy address: ffff888106715300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888106715380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff888106715400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888106715480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888106715500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
The issue occurs because a pool can still be held by a caller after its associated memory region is unregistered. The current implementation frees the pool even if users still hold references to it (e.g., before uncharge operations complete). This patch adds a reference counter to each pool, ensuring that a pool is only freed when its reference count drops to zero. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: stable@vger.kernel.org # v6.14+ Signed-off-by: Chen Ridong <chenridong@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org> Conflicts: kernel/cgroup/dmem.c [Context conflicts] Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/cgroup/dmem.c | 60 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index e6a20de91332..4cc33e4d8257 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/page_counter.h> #include <linux/parser.h> +#include <linux/refcount.h> #include <linux/slab.h> struct dmem_cgroup_region { @@ -70,7 +71,9 @@ struct dmem_cgroup_pool_state { struct rcu_head rcu; struct page_counter cnt; + struct dmem_cgroup_pool_state *parent; + refcount_t ref; bool inited; }; @@ -87,6 +90,9 @@ struct dmem_cgroup_pool_state { static DEFINE_SPINLOCK(dmemcg_lock); static LIST_HEAD(dmem_cgroup_regions); +static void dmemcg_free_region(struct kref *ref); +static void dmemcg_pool_free_rcu(struct rcu_head *rcu); + static inline struct dmemcg_state * css_to_dmemcs(struct cgroup_subsys_state *css) { @@ -103,10 +109,38 @@ static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg) return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL; } +static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool) +{ + refcount_inc(&pool->ref); +} + +static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool) +{ + return refcount_inc_not_zero(&pool->ref); +} + +static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool) +{ + if (!refcount_dec_and_test(&pool->ref)) + return; + + call_rcu(&pool->rcu, dmemcg_pool_free_rcu); +} + +static void dmemcg_pool_free_rcu(struct rcu_head *rcu) +{ + struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu); + + if (pool->parent) + dmemcg_pool_put(pool->parent); + kref_put(&pool->region->ref, dmemcg_free_region); + kfree(pool); +} + static void free_cg_pool(struct dmem_cgroup_pool_state *pool) { list_del(&pool->region_node); - kfree(pool); + dmemcg_pool_put(pool); } static void @@ -341,6 +375,12 @@ alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region page_counter_init(&pool->cnt, ppool ? &ppool->cnt : NULL); reset_all_resource_limits(pool); + refcount_set(&pool->ref, 1); + kref_get(®ion->ref); + if (ppool && !pool->parent) { + pool->parent = ppool; + dmemcg_pool_get(ppool); + } list_add_tail_rcu(&pool->css_node, &dmemcs->pools); list_add_tail(&pool->region_node, ®ion->pools); @@ -388,6 +428,10 @@ get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *regio /* Fix up parent links, mark as inited. */ pool->cnt.parent = &ppool->cnt; + if (ppool && !pool->parent) { + pool->parent = ppool; + dmemcg_pool_get(ppool); + } pool->inited = true; pool = ppool; @@ -434,6 +478,8 @@ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) list_for_each_entry_safe(pool, next, ®ion->pools, region_node) { list_del_rcu(&pool->css_node); + list_del(&pool->region_node); + dmemcg_pool_put(pool); } /* @@ -514,8 +560,10 @@ static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name) */ void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) { - if (pool) + if (pool) { css_put(&pool->cs->css); + dmemcg_pool_put(pool); + } } EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put); @@ -529,6 +577,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) pool = find_cg_pool_locked(cg, region); if (pool && !READ_ONCE(pool->inited)) pool = NULL; + if (pool && !dmemcg_pool_tryget(pool)) + pool = NULL; rcu_read_unlock(); while (!pool) { @@ -537,6 +587,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) pool = get_cg_pool_locked(cg, region, &allocpool); else pool = ERR_PTR(-ENODEV); + if (!IS_ERR(pool)) + dmemcg_pool_get(pool); spin_unlock(&dmemcg_lock); if (pool == ERR_PTR(-ENOMEM)) { @@ -572,6 +624,7 @@ void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) page_counter_uncharge(&pool->cnt, size); css_put(&pool->cs->css); + dmemcg_pool_put(pool); } EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge); @@ -623,7 +676,9 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, if (ret_limit_pool) { *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt); css_get(&(*ret_limit_pool)->cs->css); + dmemcg_pool_get(*ret_limit_pool); } + dmemcg_pool_put(pool); ret = -EAGAIN; goto err; } @@ -718,6 +773,7 @@ static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, /* And commit */ apply(pool, new_limit); + dmemcg_pool_put(pool); out_put: kref_put(®ion->ref, dmemcg_free_region); -- 2.34.1