From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-5.6-rc4 commit 01e99aeca3979600302913cef3f89076786f32c8 category: bugfix bugzilla: 42777 CVE: NA
---------------------------
For some reason, device may be in one situation which can't handle FS request, so STS_RESOURCE is always returned and the FS request will be added to hctx->dispatch. However passthrough request may be required at that time for fixing the problem. If passthrough request is added to scheduler queue, there isn't any chance for blk-mq to dispatch it given we prioritize requests in hctx->dispatch. Then the FS IO request may never be completed, and IO hang is caused.
So passthrough request has to be added to hctx->dispatch directly for fixing the IO hang.
Fix this issue by inserting passthrough request into hctx->dispatch directly together withing adding FS request to the tail of hctx->dispatch in blk_mq_dispatch_rq_list(). Actually we add FS request to tail of hctx->dispatch at default, see blk_mq_request_bypass_insert().
Then it becomes consistent with original legacy IO request path, in which passthrough request is always added to q->queue_head.
Cc: Dongli Zhang dongli.zhang@oracle.com Cc: Christoph Hellwig hch@infradead.org Cc: Ewan D. Milne emilne@redhat.com Signed-off-by: Ming Lei ming.lei@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: block/blk-flush.c block/blk-mq.c block/blk-mq-sched.c Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Yufen Yu yuyufen@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-flush.c | 2 +- block/blk-mq-sched.c | 22 +++++++++++++++------- block/blk-mq.c | 18 +++++++++++------- block/blk-mq.h | 3 ++- 4 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/block/blk-flush.c b/block/blk-flush.c index 256fa1ccc2bd..2a8369eb6c1c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -495,7 +495,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { if (q->mq_ops) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else list_add_tail(&rq->queuelist, &q->queue_head); return; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e98f51b86342..623c258d0b34 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -357,13 +357,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - }
if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -387,8 +393,10 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
- if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + }
if (e && e->type->ops.mq.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq.c b/block/blk-mq.c index 59c92961cc20..a9138235b870 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -721,7 +721,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1227,7 +1227,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool needs_restart;
spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock);
/* @@ -1590,13 +1590,17 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock);
if (run_queue) @@ -1776,7 +1780,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (bypass_insert) return BLK_STS_RESOURCE;
- blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; }
@@ -1792,7 +1796,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret);
@@ -1827,7 +1831,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } diff --git a/block/blk-mq.h b/block/blk-mq.h index a6094c27b827..debc646e1bed 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -64,7 +64,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list);