From: Roman Gushchin guro@fb.com
mainline inclusion from mainline-5.3-rc1 commit 434866947564b954409c2fe561605e22f7b49f64 category: bugfix bugzilla: 34611 CVE: NA
------------------------------------------------- Currently SLUB uses a work scheduled after an RCU grace period to deactivate a non-root kmem_cache. This mechanism can be reused for kmem_caches release, but requires generalization for SLAB case.
Introduce kmemcg_cache_deactivate() function, which calls allocator-specific __kmem_cache_deactivate() and schedules execution of __kmem_cache_deactivate_after_rcu() with all necessary locks in a worker context after an rcu grace period.
Here is the new calling scheme: kmemcg_cache_deactivate() __kmemcg_cache_deactivate() SLAB/SLUB-specific kmemcg_rcufn() rcu kmemcg_workfn() work __kmemcg_cache_deactivate_after_rcu() SLAB/SLUB-specific
instead of: __kmemcg_cache_deactivate() SLAB/SLUB-specific slab_deactivate_memcg_cache_rcu_sched() SLUB-only kmemcg_rcufn() rcu kmemcg_workfn() work kmemcg_cache_deact_after_rcu() SLUB-only
For consistency, all allocator-specific functions start with "__".
Link: http://lkml.kernel.org/r/20190611231813.3148843-4-guro@fb.com Signed-off-by: Roman Gushchin guro@fb.com Acked-by: Vladimir Davydov vdavydov.dev@gmail.com Reviewed-by: Shakeel Butt shakeelb@google.com Cc: Christoph Lameter cl@linux.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Waiman Long longman@redhat.com Cc: David Rientjes rientjes@google.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Pekka Enberg penberg@kernel.org Cc: Andrei Vagin avagin@gmail.com Cc: Qian Cai cai@lca.pw Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 434866947564b954409c2fe561605e22f7b49f64) Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/slab.c | 4 ++++ mm/slab.h | 3 +-- mm/slab_common.c | 27 ++++++++------------------- mm/slub.c | 8 +------- 4 files changed, 14 insertions(+), 28 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c index d876c379d966..a04e81dbbcdb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2329,6 +2329,10 @@ void __kmemcg_cache_deactivate(struct kmem_cache *cachep) { __kmem_cache_shrink(cachep); } + +void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s) +{ +} #endif
int __kmem_cache_shutdown(struct kmem_cache *cachep) diff --git a/mm/slab.h b/mm/slab.h index a1b72f757c6c..0875314b1210 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -172,6 +172,7 @@ int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); int __kmem_cache_shrink(struct kmem_cache *); void __kmemcg_cache_deactivate(struct kmem_cache *s); +void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s); void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file; @@ -290,8 +291,6 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
extern void slab_init_memcg_params(struct kmem_cache *); extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg); -extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, - void (*work_fn)(struct kmem_cache *));
#else /* CONFIG_MEMCG_KMEM */
diff --git a/mm/slab_common.c b/mm/slab_common.c index 97d63c6ab2be..318d2527bc0b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -690,7 +690,7 @@ static void kmemcg_workfn(struct work_struct *work) put_online_mems(); put_online_cpus();
- /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */ + /* done, put the ref from kmemcg_cache_deactivate() */ css_put(&s->memcg_params.memcg->css); }
@@ -708,24 +708,14 @@ static void kmemcg_rcufn(struct rcu_head *head) queue_work(memcg_kmem_cache_wq, &s->memcg_params.work); }
-/** - * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a - * sched RCU grace period - * @s: target kmem_cache - * @work_fn: deactivation function to call - * - * Schedule @work_fn to be invoked with online cpus, mems and slab_mutex - * held after a sched RCU grace period. The slab is guaranteed to stay - * alive until @work_fn is finished. This is to be used from - * __kmemcg_cache_deactivate(). - */ -void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, - void (*work_fn)(struct kmem_cache *)) +static void kmemcg_cache_deactivate(struct kmem_cache *s) { if (WARN_ON_ONCE(is_root_cache(s)) || WARN_ON_ONCE(s->memcg_params.work_fn)) return;
+ __kmemcg_cache_deactivate(s); + /* * memcg_kmem_wq_lock is used to synchronize memcg_params.dying * flag and make sure that no new kmem_cache deactivation tasks @@ -738,7 +728,7 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, /* pin memcg so that @s doesn't get destroyed in the middle */ css_get(&s->memcg_params.memcg->css);
- s->memcg_params.work_fn = work_fn; + s->memcg_params.work_fn = __kmemcg_cache_deactivate_after_rcu; call_rcu(&s->memcg_params.rcu_head, kmemcg_rcufn); unlock: spin_unlock_irq(&memcg_kmem_wq_lock); @@ -763,7 +753,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) if (!c) continue;
- __kmemcg_cache_deactivate(c); + kmemcg_cache_deactivate(c); arr->entries[idx] = NULL; } mutex_unlock(&slab_mutex); @@ -859,11 +849,10 @@ static void memcg_set_kmem_cache_dying(struct kmem_cache *s) static void flush_memcg_workqueue(struct kmem_cache *s) { /* - * SLUB deactivates the kmem_caches through call_rcu. Make + * SLAB and SLUB deactivate the kmem_caches through call_rcu. Make * sure all registered rcu callbacks have been invoked. */ - if (IS_ENABLED(CONFIG_SLUB)) - rcu_barrier(); + rcu_barrier();
/* * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB diff --git a/mm/slub.c b/mm/slub.c index 8c3de980ee07..4104c266580a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4025,7 +4025,7 @@ int __kmem_cache_shrink(struct kmem_cache *s) }
#ifdef CONFIG_MEMCG -static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s) +void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s) { /* * Called with all the locks held after a sched RCU grace period. @@ -4051,12 +4051,6 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s) */ slub_set_cpu_partial(s, 0); s->min_partial = 0; - - /* - * s->cpu_partial is checked locklessly (see put_cpu_partial), so - * we have to make sure the change is visible before shrinking. - */ - slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu); } #endif /* CONFIG_MEMCG */