From: John Garry john.garry@huawei.com
mainline inclusion from mainline-v5.16-rc1 commit e155b0c238b20f0a866f4334d292656665836c8a category: performance bugzilla: 186917, https://gitee.com/openeuler/kernel/issues/I5N1S5 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Currently we use separate sbitmap pairs and active_queues atomic_t for shared sbitmap support.
However a full sets of static requests are used per HW queue, which is quite wasteful, considering that the total number of requests usable at any given time across all HW queues is limited by the shared sbitmap depth.
As such, it is considerably more memory efficient in the case of shared sbitmap to allocate a set of static rqs per tag set or request queue, and not per HW queue.
So replace the sbitmap pairs and active_queues atomic_t with a shared tags per tagset and request queue, which will hold a set of shared static rqs.
Since there is now no valid HW queue index to be passed to the blk_mq_ops .init and .exit_request callbacks, pass an invalid index token. This changes the semantics of the APIs, such that the callback would need to validate the HW queue index before using it. Currently no user of shared sbitmap actually uses the HW queue index (as would be expected).
Signed-off-by: John Garry john.garry@huawei.com Reviewed-by: Ming Lei ming.lei@redhat.com Link: https://lore.kernel.org/r/1633429419-228500-13-git-send-email-john.garry@hua... Signed-off-by: Jens Axboe axboe@kernel.dk Conflict: c6f9c0e2d53d ("blk-mq: allow hardware queue to get more tag while sharing a tag set") is merged, which will cause lots of conflicts for this patch, and in the meantime, the functionality will need to be adapted in that patch. Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/blk-mq-debugfs.c | 18 -------- block/blk-mq-sched.c | 82 ++++++++++++++++----------------- block/blk-mq-tag.c | 75 ++++++++++-------------------- block/blk-mq-tag.h | 6 +-- block/blk-mq.c | 102 +++++++++++++++++++++-------------------- block/blk-mq.h | 15 +++--- include/linux/blk-mq.h | 16 +++---- include/linux/blkdev.h | 4 +- 8 files changed, 133 insertions(+), 185 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f3a263a1bb43..6eca71417415 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -216,23 +216,6 @@ static int queue_tag_set_show(void *data, struct seq_file *m) seq_printf(m, "numa_node=%d\n", set->numa_node); seq_printf(m, "timeout=%u\n", set->timeout); seq_printf(m, "flags=%u\n", set->flags); - seq_printf(m, "active_queues_shared_sbitmap=%d\n", - atomic_read(&set->active_queues_shared_sbitmap)); - seq_printf(m, "pending_queues_shared_sbitmap=%d\n", - atomic_read(&set->pending_queues_shared_sbitmap)); - - return 0; -} - -static int queue_dtag_wait_time_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - unsigned int time = 0; - - if (test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags)) - time = jiffies_to_msecs(jiffies - READ_ONCE(q->dtag_wait_time)); - - seq_printf(m, "%u\n", time);
return 0; } @@ -245,7 +228,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, { "tag_set", 0400, queue_tag_set_show, NULL }, - { "dtag_wait_time_ms", 0400, queue_dtag_wait_time_show, NULL }, { }, };
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index a382fc31b8a6..36cfae6e1dd3 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -518,6 +518,11 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) { + hctx->sched_tags = q->shared_sbitmap_tags; + return 0; + } + hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx, q->nr_requests);
@@ -526,61 +531,54 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, return 0; }
+static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue) +{ + blk_mq_free_rq_map(queue->shared_sbitmap_tags); + queue->shared_sbitmap_tags = NULL; +} + /* called in queue's release handler, tagset has gone away */ -static void blk_mq_sched_tags_teardown(struct request_queue *q) +static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags) { struct blk_mq_hw_ctx *hctx; int i;
queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) { - blk_mq_free_rq_map(hctx->sched_tags, hctx->flags); + if (!blk_mq_is_sbitmap_shared(q->tag_set->flags)) + blk_mq_free_rq_map(hctx->sched_tags); hctx->sched_tags = NULL; } } + + if (blk_mq_is_sbitmap_shared(flags)) + blk_mq_exit_sched_shared_sbitmap(q); }
static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue) { struct blk_mq_tag_set *set = queue->tag_set; - int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags); - struct blk_mq_hw_ctx *hctx; - int ret, i;
/* * Set initial depth at max so that we don't need to reallocate for * updating nr_requests. */ - ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags, - &queue->sched_breserved_tags, - MAX_SCHED_RQ, set->reserved_tags, - set->numa_node, alloc_policy); - if (ret) - return ret; - - queue_for_each_hw_ctx(queue, hctx, i) { - hctx->sched_tags->bitmap_tags = - &queue->sched_bitmap_tags; - hctx->sched_tags->breserved_tags = - &queue->sched_breserved_tags; - } + queue->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set, + BLK_MQ_NO_HCTX_IDX, + MAX_SCHED_RQ); + if (!queue->shared_sbitmap_tags) + return -ENOMEM;
blk_mq_tag_update_sched_shared_sbitmap(queue);
return 0; }
-static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue) -{ - sbitmap_queue_free(&queue->sched_bitmap_tags); - sbitmap_queue_free(&queue->sched_breserved_tags); -} - int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) { + unsigned int i, flags = q->tag_set->flags; struct blk_mq_hw_ctx *hctx; struct elevator_queue *eq; - unsigned int i; int ret;
if (!e) { @@ -597,21 +595,21 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, BLKDEV_MAX_RQ);
- queue_for_each_hw_ctx(q, hctx, i) { - ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); + if (blk_mq_is_sbitmap_shared(flags)) { + ret = blk_mq_init_sched_shared_sbitmap(q); if (ret) - goto err_free_map_and_rqs; + return ret; }
- if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) { - ret = blk_mq_init_sched_shared_sbitmap(q); + queue_for_each_hw_ctx(q, hctx, i) { + ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); if (ret) goto err_free_map_and_rqs; }
ret = e->ops.init_sched(q, e); if (ret) - goto err_free_sbitmap; + goto err_free_map_and_rqs;
blk_mq_debugfs_register_sched(q);
@@ -631,12 +629,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
return 0;
-err_free_sbitmap: - if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) - blk_mq_exit_sched_shared_sbitmap(q); err_free_map_and_rqs: blk_mq_sched_free_rqs(q); - blk_mq_sched_tags_teardown(q); + blk_mq_sched_tags_teardown(q, flags); + q->elevator = NULL; return ret; } @@ -650,9 +646,15 @@ void blk_mq_sched_free_rqs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i;
- queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->sched_tags) - blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); + if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) { + blk_mq_free_rqs(q->tag_set, q->shared_sbitmap_tags, + BLK_MQ_NO_HCTX_IDX); + } else { + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_tags) + blk_mq_free_rqs(q->tag_set, + hctx->sched_tags, i); + } } }
@@ -673,8 +675,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) blk_mq_debugfs_unregister_sched(q); if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); - blk_mq_sched_tags_teardown(q); - if (blk_mq_is_sbitmap_shared(flags)) - blk_mq_exit_sched_shared_sbitmap(q); + blk_mq_sched_tags_teardown(q, flags); q->elevator = NULL; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 5acbb4cd265d..6341718c6471 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -28,11 +28,10 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { if (blk_mq_is_sbitmap_shared(hctx->flags)) { struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) - atomic_inc(&set->active_queues_shared_sbitmap); + atomic_inc(&hctx->tags->active_queues); } else { if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) @@ -59,14 +58,14 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; - struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set;
if (blk_mq_is_sbitmap_shared(hctx->flags)) { + struct request_queue *q = hctx->queue; + if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return; - atomic_dec(&set->active_queues_shared_sbitmap); + atomic_dec(&tags->active_queues); } else { if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return; @@ -80,12 +79,11 @@ void __blk_mq_dtag_busy(struct blk_mq_hw_ctx *hctx) { if (blk_mq_is_sbitmap_shared(hctx->flags)) { struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags) && !test_and_set_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags)) { - WRITE_ONCE(q->dtag_wait_time, jiffies); - atomic_inc(&set->pending_queues_shared_sbitmap); + WRITE_ONCE(hctx->dtag_wait_time, jiffies); + atomic_inc(&hctx->tags->pending_queues); } } else { if (!test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state) && @@ -100,14 +98,13 @@ void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx, bool force) { struct blk_mq_tags *tags = hctx->tags; struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set;
if (blk_mq_is_sbitmap_shared(hctx->flags)) { if (!test_bit(QUEUE_FLAG_HCTX_WAIT, &q->queue_flags)) return;
if (!force && time_before(jiffies, - READ_ONCE(q->dtag_wait_time) + + READ_ONCE(hctx->dtag_wait_time) + BLK_MQ_DTAG_WAIT_EXPIRE)) return;
@@ -115,8 +112,8 @@ void __blk_mq_dtag_idle(struct blk_mq_hw_ctx *hctx, bool force) &q->queue_flags)) return;
- WRITE_ONCE(q->dtag_wait_time, jiffies); - atomic_dec(&set->pending_queues_shared_sbitmap); + WRITE_ONCE(hctx->dtag_wait_time, jiffies); + atomic_dec(&tags->pending_queues); } else { if (!test_bit(BLK_MQ_S_DTAG_WAIT, &hctx->state)) return; @@ -577,38 +574,10 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, return 0; }
-int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set) -{ - int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags); - int i, ret; - - ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags, - set->queue_depth, set->reserved_tags, - set->numa_node, alloc_policy); - if (ret) - return ret; - - for (i = 0; i < set->nr_hw_queues; i++) { - struct blk_mq_tags *tags = set->tags[i]; - - tags->bitmap_tags = &set->__bitmap_tags; - tags->breserved_tags = &set->__breserved_tags; - } - - return 0; -} - -void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set) -{ - sbitmap_queue_free(&set->__bitmap_tags); - sbitmap_queue_free(&set->__breserved_tags); -} - struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, unsigned int reserved_tags, - int node, unsigned int flags) + int node, int alloc_policy) { - int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags); struct blk_mq_tags *tags;
if (total_tags > BLK_MQ_TAG_MAX) { @@ -624,9 +593,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_reserved_tags = reserved_tags; spin_lock_init(&tags->lock);
- if (blk_mq_is_sbitmap_shared(flags)) - return tags; - if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { kfree(tags); return NULL; @@ -634,12 +600,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, return tags; }
-void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags) +void blk_mq_free_tags(struct blk_mq_tags *tags) { - if (!blk_mq_is_sbitmap_shared(flags)) { - sbitmap_queue_free(tags->bitmap_tags); - sbitmap_queue_free(tags->breserved_tags); - } + sbitmap_queue_free(tags->bitmap_tags); + sbitmap_queue_free(tags->breserved_tags); kfree(tags); }
@@ -670,6 +634,13 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, if (tdepth > MAX_SCHED_RQ) return -EINVAL;
+ /* + * Only the sbitmap needs resizing since we allocated the max + * initially. + */ + if (blk_mq_is_sbitmap_shared(set->flags)) + return 0; + new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth); if (!new) return -ENOMEM; @@ -690,12 +661,14 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size) { - sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags); + struct blk_mq_tags *tags = set->shared_sbitmap_tags; + + sbitmap_queue_resize(&tags->__bitmap_tags, size - set->reserved_tags); }
void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q) { - sbitmap_queue_resize(&q->sched_bitmap_tags, + sbitmap_queue_resize(q->shared_sbitmap_tags->bitmap_tags, q->nr_requests - q->tag_set->reserved_tags); }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 5178ea4aa746..2cd9e8f14b4f 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -43,16 +43,14 @@ struct blk_mq_tags {
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, - int node, unsigned int flags); -extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags); + int node, int alloc_policy); +extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, struct sbitmap_queue *breserved_tags, unsigned int queue_depth, unsigned int reserved, int node, int alloc_policy);
-extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set); -extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag); diff --git a/block/blk-mq.c b/block/blk-mq.c index d7fe8c5c5369..a5c18b303c41 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2474,7 +2474,10 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, struct blk_mq_tags *drv_tags; struct page *page;
- drv_tags = set->tags[hctx_idx]; + if (blk_mq_is_sbitmap_shared(set->flags)) + drv_tags = set->shared_sbitmap_tags; + else + drv_tags = set->tags[hctx_idx];
if (tags->static_rqs && set->ops->exit_request) { int i; @@ -2503,21 +2506,20 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } }
-void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags) +void blk_mq_free_rq_map(struct blk_mq_tags *tags) { kfree(tags->rqs); tags->rqs = NULL; kfree(tags->static_rqs); tags->static_rqs = NULL;
- blk_mq_free_tags(tags, flags); + blk_mq_free_tags(tags); }
static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int nr_tags, - unsigned int reserved_tags, - unsigned int flags) + unsigned int reserved_tags) { struct blk_mq_tags *tags; int node; @@ -2526,7 +2528,8 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node;
- tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags); + tags = blk_mq_init_tags(nr_tags, reserved_tags, node, + BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL;
@@ -2534,7 +2537,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!tags->rqs) { - blk_mq_free_tags(tags, flags); + blk_mq_free_tags(tags); return NULL; }
@@ -2543,7 +2546,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, node); if (!tags->static_rqs) { kfree(tags->rqs); - blk_mq_free_tags(tags, flags); + blk_mq_free_tags(tags); return NULL; }
@@ -2993,14 +2996,13 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags; int ret;
- tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags, - set->flags); + tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags); if (!tags) return NULL;
ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth); if (ret) { - blk_mq_free_rq_map(tags, set->flags); + blk_mq_free_rq_map(tags); return NULL; }
@@ -3010,6 +3012,12 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, int hctx_idx) { + if (blk_mq_is_sbitmap_shared(set->flags)) { + set->tags[hctx_idx] = set->shared_sbitmap_tags; + + return true; + } + set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx, set->queue_depth);
@@ -3020,14 +3028,21 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { - unsigned int flags = set->flags; - if (tags) { blk_mq_free_rqs(set, tags, hctx_idx); - blk_mq_free_rq_map(tags, flags); + blk_mq_free_rq_map(tags); } }
+static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, + unsigned int hctx_idx) +{ + if (!blk_mq_is_sbitmap_shared(set->flags)) + blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx); + + set->tags[hctx_idx] = NULL; +} + static void blk_mq_map_swqueue(struct request_queue *q) { unsigned int i, j, hctx_idx; @@ -3105,10 +3120,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) * fallback in case of a new remap fails * allocation */ - if (i && set->tags[i]) { - blk_mq_free_map_and_rqs(set, set->tags[i], i); - set->tags[i] = NULL; - } + if (i) + __blk_mq_free_map_and_rqs(set, i);
hctx->tags = NULL; continue; @@ -3482,7 +3495,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, spin_lock_init(&q->requeue_lock);
q->nr_requests = set->queue_depth; - q->dtag_wait_time = jiffies;
/* * Default to classic polling @@ -3526,6 +3538,14 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i;
+ if (blk_mq_is_sbitmap_shared(set->flags)) { + set->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set, + BLK_MQ_NO_HCTX_IDX, + set->queue_depth); + if (!set->shared_sbitmap_tags) + return -ENOMEM; + } + for (i = 0; i < set->nr_hw_queues; i++) { if (!__blk_mq_alloc_map_and_rqs(set, i)) goto out_unwind; @@ -3535,9 +3555,12 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0;
out_unwind: - while (--i >= 0) { - blk_mq_free_map_and_rqs(set, set->tags[i], i); - set->tags[i] = NULL; + while (--i >= 0) + __blk_mq_free_map_and_rqs(set, i); + + if (blk_mq_is_sbitmap_shared(set->flags)) { + blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags, + BLK_MQ_NO_HCTX_IDX); }
return -ENOMEM; @@ -3712,26 +3735,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (ret) goto out_free_mq_map;
- if (blk_mq_is_sbitmap_shared(set->flags)) { - atomic_set(&set->active_queues_shared_sbitmap, 0); - atomic_set(&set->pending_queues_shared_sbitmap, 0); - - if (blk_mq_init_shared_sbitmap(set)) { - ret = -ENOMEM; - goto out_free_mq_rq_maps; - } - } - mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list);
return 0;
-out_free_mq_rq_maps: - for (i = 0; i < set->nr_hw_queues; i++) { - blk_mq_free_map_and_rqs(set, set->tags[i], i); - set->tags[i] = NULL; - } out_free_mq_map: for (i = 0; i < set->nr_maps; i++) { kfree(set->map[i].mq_map); @@ -3747,13 +3755,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { int i, j;
- for (i = 0; i < set->nr_hw_queues; i++) { - blk_mq_free_map_and_rqs(set, set->tags[i], i); - set->tags[i] = NULL; - } + for (i = 0; i < set->nr_hw_queues; i++) + __blk_mq_free_map_and_rqs(set, i);
- if (blk_mq_is_sbitmap_shared(set->flags)) - blk_mq_exit_shared_sbitmap(set); + if (blk_mq_is_sbitmap_shared(set->flags)) { + blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags, + BLK_MQ_NO_HCTX_IDX); + }
for (j = 0; j < set->nr_maps; j++) { kfree(set->map[j].mq_map); @@ -3791,12 +3799,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (hctx->sched_tags) { ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); - if (blk_mq_is_sbitmap_shared(set->flags)) { - hctx->sched_tags->bitmap_tags = - &q->sched_bitmap_tags; - hctx->sched_tags->breserved_tags = - &q->sched_breserved_tags; - } } else { ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); @@ -3940,7 +3942,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", nr_hw_queues, prev_nr_hw_queues); for (; i < set->nr_hw_queues; i++) - blk_mq_free_map_and_rqs(set, set->tags[i], i); + __blk_mq_free_map_and_rqs(set, i);
set->nr_hw_queues = prev_nr_hw_queues; blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); diff --git a/block/blk-mq.h b/block/blk-mq.h index 15ce74c266a6..5572277cf9a3 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -54,7 +54,7 @@ void blk_mq_put_rq_ref(struct request *rq); */ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx); -void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags); +void blk_mq_free_rq_map(struct blk_mq_tags *tags); struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int depth); void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, @@ -313,24 +313,21 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, if (bt->sb.depth == 1) return true;
+ if (mq_unfair_dtag && !atomic_read(&hctx->tags->pending_queues)) + return true; + if (blk_mq_is_sbitmap_shared(hctx->flags)) { struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set;
- if (mq_unfair_dtag && - !atomic_read(&set->pending_queues_shared_sbitmap)) - return true; if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; - users = atomic_read(&set->active_queues_shared_sbitmap); } else { - if (mq_unfair_dtag && !atomic_read(&hctx->tags->pending_queues)) - return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; - users = atomic_read(&hctx->tags->active_queues); }
+ users = atomic_read(&hctx->tags->active_queues); + if (!users) return true;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e4e46229d0eb..1707f217c636 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -248,13 +248,11 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. - * @active_queues_shared_sbitmap: - * number of active request queues per tag set. - * @__bitmap_tags: A shared tags sbitmap, used over all hctx's - * @__breserved_tags: - * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. + * @shared_sbitmap_tags: + * Shared sbitmap set of tags. Has @nr_hw_queues elements. If + * set, shared by all @tags. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. @@ -271,13 +269,11 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; - atomic_t active_queues_shared_sbitmap; - atomic_t pending_queues_shared_sbitmap;
- struct sbitmap_queue __bitmap_tags; - struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags;
+ struct blk_mq_tags *shared_sbitmap_tags; + struct mutex tag_list_lock; struct list_head tag_list;
@@ -472,6 +468,8 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT)
+#define BLK_MQ_NO_HCTX_IDX (-1U) + struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 87116833d7ed..3acfadd6724b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -499,8 +499,7 @@ struct request_queue {
atomic_t nr_active_requests_shared_sbitmap;
- struct sbitmap_queue sched_bitmap_tags; - struct sbitmap_queue sched_breserved_tags; + struct blk_mq_tags *shared_sbitmap_tags;
struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -605,7 +604,6 @@ struct request_queue { #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS];
- unsigned long dtag_wait_time; KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3)