From: John Garry john.garry@huawei.com
mainline inclusion from mainline-v5.17-rc1 commit fea9f92f1748083cb82049ed503be30c3d3a9b69 category: bugfix bugzilla: 186917, https://gitee.com/openeuler/kernel/issues/I5N1S5 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Kashyap reports high CPU usage in blk_mq_queue_tag_busy_iter() and callees using megaraid SAS RAID card since moving to shared tags [0].
Previously, when shared tags was shared sbitmap, this function was less than optimum since we would iter through all tags for all hctx's, yet only ever match upto tagset depth number of rqs.
Since the change to shared tags, things are even less efficient if we have parallel callers of blk_mq_queue_tag_busy_iter(). This is because in bt_iter() -> blk_mq_find_and_get_req() there would be more contention on accessing each request ref and tags->lock since they are now shared among all HW queues.
Optimise by having separate calls to bt_for_each() for when we're using shared tags. In this case no longer pass a hctx, as it is no longer relevant, and teach bt_iter() about this.
Ming suggested something along the lines of this change, apart from a different implementation.
[0] https://lore.kernel.org/linux-block/e4e92abbe9d52bcba6b8cc6c91c442cc@mail.gm...
Signed-off-by: John Garry john.garry@huawei.com Reviewed-by: Hannes Reinecke hare@suse.de Reviewed-by: Ming Lei ming.lei@redhat.com Reported-and-tested-by: Kashyap Desai kashyap.desai@broadcom.com Fixes: e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support") Link: https://lore.kernel.org/r/1638794990-137490-4-git-send-email-john.garry@huaw... Signed-off-by: Jens Axboe axboe@kernel.dk
Conflict: block/blk-mq-tag.c Signed-off-by: Zhang Wensheng zhangwensheng5@huawei.com Reviewed-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/blk-mq-tag.c | 59 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 18 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dd291a21bcda..daf7aca4700e 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -259,6 +259,7 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
struct bt_iter_data { struct blk_mq_hw_ctx *hctx; + struct request_queue *q; busy_iter_fn *fn; void *data; bool reserved; @@ -282,11 +283,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_iter_data *iter_data = data; struct blk_mq_hw_ctx *hctx = iter_data->hctx; - struct blk_mq_tags *tags = hctx->tags; + struct request_queue *q = iter_data->q; + struct blk_mq_tag_set *set = q->tag_set; bool reserved = iter_data->reserved; + struct blk_mq_tags *tags; struct request *rq; bool ret = true;
+ if (blk_mq_is_sbitmap_shared(set->flags)) + tags = set->shared_sbitmap_tags; + else + tags = hctx->tags; + if (!reserved) bitnr += tags->nr_reserved_tags; /* @@ -297,7 +305,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) if (!rq) return true;
- if (rq->q == hctx->queue && rq->mq_hctx == hctx) + if (rq->q == q && (!hctx || rq->mq_hctx == hctx)) ret = iter_data->fn(hctx, rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; @@ -306,6 +314,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /** * bt_for_each - iterate over the requests associated with a hardware queue * @hctx: Hardware queue to examine. + * @q: Request queue to examine. * @bt: sbitmap to examine. This is either the breserved_tags member * or the bitmap_tags member of struct blk_mq_tags. * @fn: Pointer to the function that will be called for each request @@ -317,14 +326,16 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * @reserved: Indicates whether @bt is the breserved_tags member or the * bitmap_tags member of struct blk_mq_tags. */ -static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, - busy_iter_fn *fn, void *data, bool reserved) +static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q, + struct sbitmap_queue *bt, busy_iter_fn *fn, + void *data, bool reserved) { struct bt_iter_data iter_data = { .hctx = hctx, .fn = fn, .data = data, .reserved = reserved, + .q = q, };
sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data); @@ -501,9 +512,6 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { - struct blk_mq_hw_ctx *hctx; - int i; - /* * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx * while the queue is frozen. So we can use q_usage_counter to avoid @@ -512,19 +520,34 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, if (!percpu_ref_tryget(&q->q_usage_counter)) return;
- queue_for_each_hw_ctx(q, hctx, i) { - struct blk_mq_tags *tags = hctx->tags; - - /* - * If no software queues are currently mapped to this - * hardware queue, there's nothing to check - */ - if (!blk_mq_hw_queue_mapped(hctx)) - continue; + if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) { + struct blk_mq_tags *tags = q->tag_set->shared_sbitmap_tags; + struct sbitmap_queue *bresv = &tags->breserved_tags; + struct sbitmap_queue *btags = &tags->bitmap_tags;
if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); + bt_for_each(NULL, q, bresv, fn, priv, true); + bt_for_each(NULL, q, btags, fn, priv, false); + } else { + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_tags *tags = hctx->tags; + struct sbitmap_queue *bresv = &tags->breserved_tags; + struct sbitmap_queue *btags = &tags->bitmap_tags; + + /* + * If no software queues are currently mapped to this + * hardware queue, there's nothing to check + */ + if (!blk_mq_hw_queue_mapped(hctx)) + continue; + + if (tags->nr_reserved_tags) + bt_for_each(hctx, q, bresv, fn, priv, true); + bt_for_each(hctx, q, btags, fn, priv, false); + } } blk_queue_exit(q); }