From: Tejun Heo tj@kernel.org
mainline inclusion from mainline-5.4-rc4 commit 9d179b865449b351ad5cb76dbea480c9170d4a27 category: feature bugzilla: 38688 CVE: NA
---------------------------
blkcg_activate_policy() has the following bugs.
* cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however, blkcg_activate_policy() ends up using pd's allocated for the root blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs can be passed in pd's which are allocated for the root blkcg.
For blk-iocost, this means that ->pd_init_fn() can write beyond the end of the allocated object as it determines the length of the flex array at the end based on the blkcg's nesting level.
* Each pd is initialized as they get allocated. If alloc fails, the policy will get freed with pd's initialized on it.
* After the above partial failure, the partial pds are not freed.
This patch fixes all the above issues by
* Restructuring blkcg_activate_policy() so that alloc and init passes are separate. Init takes place only after all allocs succeeded and on failure all allocated pds are freed.
* Unifying and fixing the cleanup of the remaining pd_prealloc.
Signed-off-by: Tejun Heo tj@kernel.org Fixes: cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") Signed-off-by: Jens Axboe axboe@kernel.dk
Conflict: block/blk-cgroup.c Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-cgroup.c | 68 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 17 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a8151ecd8a66..20d82681a62ad 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1424,7 +1424,7 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { struct blkg_policy_data *pd_prealloc = NULL; - struct blkcg_gq *blkg; + struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret;
if (blkcg_policy_enabled(q, pol)) @@ -1434,51 +1434,85 @@ int blkcg_activate_policy(struct request_queue *q, blk_mq_freeze_queue(q); else blk_queue_bypass_start(q); -pd_prealloc: - if (!pd_prealloc) { - pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root); - if (!pd_prealloc) { - ret = -ENOMEM; - goto out_bypass_end; - } - }
+retry: spin_lock_irq(q->queue_lock);
- /* blkg_list is pushed at the head, reverse walk to init parents first */ + /* blkg_list is pushed at the head, reverse walk to allocate parents first */ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd;
if (blkg->pd[pol->plid]) continue;
- pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root); - if (!pd) - swap(pd, pd_prealloc); + /* If prealloc matches, use it; otherwise try GFP_NOWAIT */ + if (blkg == pinned_blkg) { + pd = pd_prealloc; + pd_prealloc = NULL; + } else { + pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, + blkg->blkcg); + } + if (!pd) { + /* + * GFP_NOWAIT failed. Free the existing one and + * prealloc for @blkg w/ GFP_KERNEL. + */ + if (pinned_blkg) + blkg_put(pinned_blkg); + blkg_get(blkg); + pinned_blkg = blkg; + spin_unlock_irq(q->queue_lock); - goto pd_prealloc; + + if (pd_prealloc) + pol->pd_free_fn(pd_prealloc); + pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, + blkg->blkcg); + if (pd_prealloc) + goto retry; + else + goto enomem; }
blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; - if (pol->pd_init_fn) - pol->pd_init_fn(pd); }
+ /* all allocated, init in the same order */ + if (pol->pd_init_fn) + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_init_fn(blkg->pd[pol->plid]); + __set_bit(pol->plid, q->blkcg_pols); ret = 0;
spin_unlock_irq(q->queue_lock); -out_bypass_end: +out: if (q->mq_ops) blk_mq_unfreeze_queue(q); else blk_queue_bypass_end(q); + if (pinned_blkg) + blkg_put(pinned_blkg); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; + +enomem: + /* alloc failed, nothing's initialized yet, free everything */ + spin_lock_irq(q->queue_lock); + list_for_each_entry(blkg, &q->blkg_list, q_node) { + if (blkg->pd[pol->plid]) { + pol->pd_free_fn(blkg->pd[pol->plid]); + blkg->pd[pol->plid] = NULL; + } + } + spin_unlock_irq(q->queue_lock); + ret = -ENOMEM; + goto out; } EXPORT_SYMBOL_GPL(blkcg_activate_policy);