From: Jinke Han hanjinke.666@bytedance.com
mainline inclusion from mainline-v6.0-rc1 commit 14a6e2eb7df5c7897c15b109cba29ab0c4a791b6 category: bugfix bugzilla: 188088, https://gitee.com/openeuler/kernel/issues/I66GIL CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
In our test of iocost, we encountered some list add/del corruptions of inner_walk list in ioc_timer_fn.
The reason can be described as follows:
cpu 0 cpu 1 ioc_qos_write ioc_qos_write
ioc = q_to_ioc(queue); if (!ioc) { ioc = kzalloc(); ioc = q_to_ioc(queue); if (!ioc) { ioc = kzalloc(); ... rq_qos_add(q, rqos); } ... rq_qos_add(q, rqos); ... }
When the io.cost.qos file is written by two cpus concurrently, rq_qos may be added to one disk twice. In that case, there will be two iocs enabled and running on one disk. They own different iocgs on their active list. In the ioc_timer_fn function, because of the iocgs from two iocs have the same root iocg, the root iocg's walk_list may be overwritten by each other and this leads to list add/del corruptions in building or destroying the inner_walk list.
And so far, the blk-rq-qos framework works in case that one instance for one type rq_qos per queue by default. This patch make this explicit and also fix the crash above.
Signed-off-by: Jinke Han hanjinke.666@bytedance.com Reviewed-by: Muchun Song songmuchun@bytedance.com Acked-by: Tejun Heo tj@kernel.org Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220720093616.70584-1-hanjinke.666@bytedance.com Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: block/blk-rq-qos.h block/blk-wbt.c
Signed-off-by: Li Nan linan122@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com --- block/blk-iocost.c | 20 +++++++++++++------- block/blk-iolatency.c | 18 +++++++++++------- block/blk-rq-qos.h | 11 ++++++++++- block/blk-wbt.c | 12 +++++++++++- 4 files changed, 45 insertions(+), 16 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a6e18d6c6c74..81dd3b02b36a 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2874,15 +2874,21 @@ static int blk_iocost_init(struct request_queue *q) * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ - rq_qos_add(q, rqos); + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free_ioc; + ret = blkcg_activate_policy(q, &blkcg_policy_iocost); - if (ret) { - rq_qos_del(q, rqos); - free_percpu(ioc->pcpu_stat); - kfree(ioc); - return ret; - } + if (ret) + goto err_del_qos; return 0; + +err_del_qos: + rq_qos_del(q, rqos); +err_free_ioc: + free_percpu(ioc->pcpu_stat); + kfree(ioc); + return ret; }
static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 74511a060d59..9811ee74b69f 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -772,19 +772,23 @@ int blk_iolatency_init(struct request_queue *q) rqos->ops = &blkcg_iolatency_ops; rqos->q = q;
- rq_qos_add(q, rqos); - + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free; ret = blkcg_activate_policy(q, &blkcg_policy_iolatency); - if (ret) { - rq_qos_del(q, rqos); - kfree(blkiolat); - return ret; - } + if (ret) + goto err_qos_del;
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
return 0; + +err_qos_del: + rq_qos_del(q, rqos); +err_free: + kfree(blkiolat); + return ret; }
static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 2bcb3495e376..37c59d7d6ba7 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -98,7 +98,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) init_waitqueue_head(&rq_wait->wait); }
-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) +static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) { /* * No IO can be in-flight when adding rqos, so freeze queue, which @@ -110,6 +110,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_freeze_queue(q);
spin_lock_irq(&q->queue_lock); + if (rq_qos_id(q, rqos->id)) + goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; spin_unlock_irq(&q->queue_lock); @@ -118,6 +120,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
if (rqos->ops->debugfs_attrs) blk_mq_debugfs_register_rqos(rqos); + + return 0; +ebusy: + spin_unlock_irq(&q->queue_lock); + blk_mq_unfreeze_queue(q); + return -EBUSY; + }
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6f63920f073c..28eb25b947cd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -818,6 +818,7 @@ int wbt_init(struct request_queue *q) { struct rq_wb *rwb; int i; + int ret;
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) @@ -847,8 +848,17 @@ int wbt_init(struct request_queue *q) /* * Assign rwb and add the stats callback. */ - rq_qos_add(q, &rwb->rqos); + ret = rq_qos_add(q, &rwb->rqos); + if (ret) + goto err_free; + blk_stat_add_callback(q, rwb->cb);
return 0; + +err_free: + blk_stat_free_callback(rwb->cb); + kfree(rwb); + return ret; + }